001package squidpony;
002
003import regexodus.Matcher;
004import regexodus.Pattern;
005import regexodus.Replacer;
006import squidpony.squidmath.CrossHash;
007import squidpony.squidmath.NumberTools;
008
009import java.util.ArrayList;
010import java.util.Collection;
011import java.util.Iterator;
012import java.util.List;
013
014/**
015 * Various utility functions for dealing with Strings, CharSequences, and char[]s; this has lots of methods to convert
016 * to and from Strings and numbers, but also has tools to wrap long CharSequences to fit in a maximum width, join arrays
017 * of various items into long Strings, split/search/count occurrences of literal char arrays or CharSequences without
018 * using any regex, and generally tidy up generated text. This last step includes padding left and right (including a
019 * "strict" option that truncates Strings that are longer than the padded size), Capitalizing Each Word, Capitalizing
020 * the first word in a sentence, replacing "a improper usage of a" with "an improved replacement using an," etc. This
021 * also has a lot of predefined categories of chars that are considered widely enough supported in fonts, like
022 * {@link #COMMON_PUNCTUATION} and {@link #LATIN_LETTERS_UPPER}.
023 * <br>
024 * Created by Tommy Ettinger on 3/21/2016.
025 */
026public class StringKit {
027    /**
028     * Searches text for the exact contents of the char array search; returns true if text contains search.
029     * @param text a CharSequence, such as a String or StringBuilder, that might contain search
030     * @param search a char array to try to find in text
031     * @return true if search was found
032     */
033    public static boolean contains(CharSequence text, char[] search) {
034        return !(text == null || text.length() == 0 || search == null || search.length <= 0)
035                && containsPart(text, search, "", "") == search.length;
036    }
037
038    /**
039     * Tries to find as much of the char array {@code search} in the CharSequence {@code text}, always starting from the
040     * beginning of search (if the beginning isn't found, then it finds nothing), and returns the length of the found
041     * part of search (0 if not found).
042     * @param text a CharSequence to search in
043     * @param search a char array to look for
044     * @return the length of the searched-for char array that was found
045     */
046    public static int containsPart(CharSequence text, char[] search)
047    {
048        return containsPart(text, search, "", "");
049    }
050
051    /**
052     * Tries to find as much of the sequence {@code prefix search suffix} as it can in text, where prefix and suffix are
053     * CharSequences for some reason and search is a char array. Returns the length of the sequence it was able to
054     * match, up to {@code prefix.length() + search.length + suffix.length()}, or 0 if no part of the looked-for
055     * sequence could be found.
056     * <br>
057     * This is almost certainly too specific to be useful outside of a handful of cases.
058     * @param text a CharSequence to search in
059     * @param search a char array to look for, surrounded by prefix and suffix
060     * @param prefix a mandatory prefix before search, separated for some weird optimization reason
061     * @param suffix a mandatory suffix after search, separated for some weird optimization reason
062     * @return the length of the searched-for prefix+search+suffix that was found
063     */
064    public static int containsPart(CharSequence text, char[] search, CharSequence prefix, CharSequence suffix)
065    {
066        if(prefix == null) prefix = "";
067        if(suffix == null) suffix = "";
068        int bl = prefix.length(), el = suffix.length();
069        if(text == null || text.length() == 0 || search == null || (search.length + bl + el <= 0))
070            return 0;
071        int sl = bl + search.length + el, tl = text.length() - sl, f = 0, sl2 = sl - el;
072        char s = (bl <= 0) ? (search.length <= 0 ? suffix.charAt(0) : search[0]) : prefix.charAt(0);
073        PRIMARY:
074        for (int i = 0; i <= tl; i++) {
075            if(text.charAt(i) == s)
076            {
077                for (int j = i+1, x = 1; x < sl; j++, x++) {
078                    if(x < bl)
079                    {
080                        if (text.charAt(j) != prefix.charAt(x)) {
081                            f = Math.max(f, x);
082                            continue PRIMARY;
083                        }
084                    }
085                    else if(x < sl2)
086                    {
087                        if (text.charAt(j) != search[x-bl]) {
088                            f = Math.max(f, x);
089                            continue PRIMARY;
090                        }
091                    }
092                    else
093                    {
094                        if (text.charAt(j) != suffix.charAt(x - sl2)) {
095                            f = Math.max(f, x);
096                            continue PRIMARY;
097                        }
098                    }
099                }
100                return sl;
101            }
102        }
103        return f;
104    }
105
106    public static String join(CharSequence delimiter, CharSequence... elements) {
107        if (elements == null || elements.length == 0) return "";
108        StringBuilder sb = new StringBuilder(64);
109        sb.append(elements[0]);
110        for (int i = 1; i < elements.length; i++) {
111            sb.append(delimiter).append(elements[i]);
112        }
113        return sb.toString();
114    }
115
116    public static String join(CharSequence delimiter, Collection<? extends CharSequence> elements) {
117        if (elements == null || elements.isEmpty()) return "";
118        StringBuilder sb = new StringBuilder(64);
119        Iterator<? extends CharSequence> it = elements.iterator();
120        sb.append(it.next());
121        while(it.hasNext()) {
122            sb.append(delimiter).append(it.next());
123        }
124        return sb.toString();
125    }
126
127    public static String joinArrays(CharSequence delimiter, char[]... elements) {
128        if (elements == null || elements.length == 0) return "";
129        StringBuilder sb = new StringBuilder(64);
130        sb.append(elements[0]);
131        for (int i = 1; i < elements.length; i++) {
132            sb.append(delimiter).append(elements[i]);
133        }
134        return sb.toString();
135    }
136
137    public static String join(CharSequence delimiter, long... elements) {
138        if (elements == null || elements.length == 0) return "";
139        StringBuilder sb = new StringBuilder(64);
140        sb.append(elements[0]);
141        for (int i = 1; i < elements.length; i++) {
142            sb.append(delimiter).append(elements[i]);
143        }
144        return sb.toString();
145    }
146    public static String join(CharSequence delimiter, double... elements) {
147        if (elements == null || elements.length == 0) return "";
148        StringBuilder sb = new StringBuilder(64);
149        sb.append(elements[0]);
150        for (int i = 1; i < elements.length; i++) {
151            sb.append(delimiter).append(elements[i]);
152        }
153        return sb.toString();
154    }
155    public static String join(CharSequence delimiter, int... elements) {
156        if (elements == null || elements.length == 0) return "";
157        StringBuilder sb = new StringBuilder(64);
158        sb.append(elements[0]);
159        for (int i = 1; i < elements.length; i++) {
160            sb.append(delimiter).append(elements[i]);
161        }
162        return sb.toString();
163    }
164    public static String join(CharSequence delimiter, float... elements) {
165        if (elements == null || elements.length == 0) return "";
166        StringBuilder sb = new StringBuilder(64);
167        sb.append(elements[0]);
168        for (int i = 1; i < elements.length; i++) {
169            sb.append(delimiter).append(elements[i]);
170        }
171        return sb.toString();
172    }
173    public static String join(CharSequence delimiter, short... elements) {
174        if (elements == null || elements.length == 0) return "";
175        StringBuilder sb = new StringBuilder(64);
176        sb.append(elements[0]);
177        for (int i = 1; i < elements.length; i++) {
178            sb.append(delimiter).append(elements[i]);
179        }
180        return sb.toString();
181    }
182    public static String join(CharSequence delimiter, char... elements) {
183        if (elements == null || elements.length == 0) return "";
184        StringBuilder sb = new StringBuilder(64);
185        sb.append(elements[0]);
186        for (int i = 1; i < elements.length; i++) {
187            sb.append(delimiter).append(elements[i]);
188        }
189        return sb.toString();
190    }
191    public static String join(CharSequence delimiter, byte... elements) {
192        if (elements == null || elements.length == 0) return "";
193        StringBuilder sb = new StringBuilder(64);
194        sb.append(elements[0]);
195        for (int i = 1; i < elements.length; i++) {
196            sb.append(delimiter).append(elements[i]);
197        }
198        return sb.toString();
199    }
200    public static String join(CharSequence delimiter, boolean... elements) {
201        if (elements == null || elements.length == 0) return "";
202        StringBuilder sb = new StringBuilder(64);
203        sb.append(elements[0]);
204        for (int i = 1; i < elements.length; i++) {
205            sb.append(delimiter).append(elements[i]);
206        }
207        return sb.toString();
208    }
209
210    /**
211     * Joins the items in {@code elements} by calling their toString method on them (or just using the String "null" for
212     * null items), and separating each item with {@code delimiter}. Unlike other join methods in this class, this does
213     * not take a vararg of Object items, since that would cause confusion with the overloads that take one object, such
214     * as {@link #join(CharSequence, Iterable)}; it takes a non-vararg Object array instead.
215     * @param delimiter the String or other CharSequence to separate items in elements with
216     * @param elements the Object items to stringify and join into one String; if the array is null or empty, this
217     *                 returns an empty String, and if items are null, they are shown as "null"
218     * @return the String representations of the items in elements, separated by delimiter and put in one String
219     */
220    public static String join(CharSequence delimiter, Object[] elements) {
221        if (elements == null || elements.length == 0) return "";
222        StringBuilder sb = new StringBuilder(64);
223        sb.append(elements[0]);
224        for (int i = 1; i < elements.length; i++) {
225            sb.append(delimiter).append(elements[i]);
226        }
227        return sb.toString();
228    }
229    /**
230     * Joins the items in {@code elements} by calling their toString method on them (or just using the String "null" for
231     * null items), and separating each item with {@code delimiter}. This can take any Iterable of any type for its
232     * elements parameter.
233     * @param delimiter the String or other CharSequence to separate items in elements with
234     * @param elements the Object items to stringify and join into one String; if Iterable is null or empty, this
235     *                 returns an empty String, and if items are null, they are shown as "null"
236     * @return the String representations of the items in elements, separated by delimiter and put in one String
237     */
238    public static String join(CharSequence delimiter, Iterable<?> elements) {
239        if (elements == null) return "";
240        Iterator<?> it = elements.iterator();
241        if(!it.hasNext()) return "";
242        StringBuilder sb = new StringBuilder(64);
243        sb.append(it.next());
244        while(it.hasNext()) {
245            sb.append(delimiter).append(it.next());
246        }
247        return sb.toString();
248    }
249
250    /**
251     * Joins the boolean array {@code elements} without delimiters into a String, using "1" for true and "0" for false.
252     * @param elements an array or vararg of booleans
253     * @return a String using 1 for true elements and 0 for false, or "N" if elements is null
254     */
255    public static String joinAlt(boolean... elements) {
256        if (elements == null) return "N";
257        if(elements.length == 0) return "";
258        StringBuilder sb = new StringBuilder(64);
259        for (int i = 0; i < elements.length; i++) {
260            sb.append(elements[i] ? '1' : '0');
261        }
262        return sb.toString();
263    }
264
265    /**
266     * Like {@link #join(CharSequence, long...)}, but this appends an 'L' to each number so they can be read in by Java.
267     * @param delimiter
268     * @param elements
269     * @return
270     */
271    public static String joinAlt(CharSequence delimiter, long... elements) {
272        if (elements == null || elements.length == 0) return "";
273        StringBuilder sb = new StringBuilder(elements.length << 2);
274        sb.append(elements[0]).append('L');
275        for (int i = 1; i < elements.length; i++) {
276            sb.append(delimiter).append(elements[i]).append('L');
277        }
278        return sb.toString();
279    }
280    /**
281     * Scans repeatedly in {@code source} for the String {@code search}, not scanning the same char twice except as part
282     * of a larger String, and returns the number of instances of search that were found, or 0 if source is null or if
283     * search is null or empty.
284     * @param source a String to look through
285     * @param search a String to look for
286     * @return the number of times search was found in source
287     */
288    public static int count(final String source, final String search)
289    {
290        if(source == null || search == null || source.isEmpty() || search.isEmpty())
291            return 0;
292        int amount = 0, idx = -1;
293        while ((idx = source.indexOf(search, idx+1)) >= 0)
294            ++amount;
295        return amount;
296    }
297
298    /**
299     * Scans repeatedly in {@code source} for the codepoint {@code search} (which is usually a char literal), not
300     * scanning the same section twice, and returns the number of instances of search that were found, or 0 if source is
301     * null.
302     * @param source a String to look through
303     * @param search a codepoint or char to look for
304     * @return the number of times search was found in source
305     */
306    public static int count(final String source, final int search)
307    {
308        if(source == null || source.isEmpty())
309            return 0;
310        int amount = 0, idx = -1;
311        while ((idx = source.indexOf(search, idx+1)) >= 0)
312            ++amount;
313        return amount;
314    }
315    /**
316     * Scans repeatedly in {@code source} (only using the area from startIndex, inclusive, to endIndex, exclusive) for
317     * the String {@code search}, not scanning the same char twice except as part of a larger String, and returns the
318     * number of instances of search that were found, or 0 if source or search is null or if the searched area is empty.
319     * If endIndex is negative, this will search from startIndex until the end of the source.
320     * @param source a String to look through
321     * @param search a String to look for
322     * @param startIndex the first index to search through, inclusive
323     * @param endIndex the last index to search through, exclusive; if negative this will search the rest of source
324     * @return the number of times search was found in source
325     */
326    public static int count(final String source, final String search, final int startIndex, int endIndex)
327    {
328        if(endIndex < 0) endIndex = 0x7fffffff;
329        if(source == null || search == null || source.isEmpty() || search.isEmpty()
330                || startIndex < 0 || startIndex >= endIndex)
331            return 0;
332        int amount = 0, idx = startIndex-1;
333        while ((idx = source.indexOf(search, idx+1)) >= 0 && idx < endIndex)
334            ++amount;
335        return amount;
336    }
337
338    /**
339     * Scans repeatedly in {@code source} (only using the area from startIndex, inclusive, to endIndex, exclusive) for
340     * the codepoint {@code search} (which is usually a char literal), not scanning the same section twice, and returns
341     * the number of instances of search that were found, or 0 if source is null or if the searched area is empty.
342     * If endIndex is negative, this will search from startIndex until the end of the source.
343     * @param source a String to look through
344     * @param search a codepoint or char to look for
345     * @param startIndex the first index to search through, inclusive
346     * @param endIndex the last index to search through, exclusive; if negative this will search the rest of source
347     * @return the number of times search was found in source
348     */
349    public static int count(final String source, final int search, final int startIndex, int endIndex)
350    {
351        if(endIndex < 0) endIndex = 0x7fffffff;
352        if(source == null || source.isEmpty() || startIndex < 0 || startIndex >= endIndex)
353            return 0;
354        int amount = 0, idx = startIndex-1;
355        while ((idx = source.indexOf(search, idx+1)) >= 0 && idx < endIndex)
356            ++amount;
357        return amount;
358    }
359
360    /**
361     * Like {@link String#substring(int, int)} but returns "" instead of throwing any sort of Exception.
362     * @param source the String to get a substring from
363     * @param beginIndex the first index, inclusive; will be treated as 0 if negative
364     * @param endIndex the index after the last character (exclusive); if negative this will be source.length()
365     * @return the substring of source between beginIndex and endIndex, or "" if any parameters are null/invalid
366     */
367    public static String safeSubstring(String source, int beginIndex, int endIndex)
368    {
369        if(source == null || source.isEmpty()) return "";
370        if(beginIndex < 0) beginIndex = 0;
371        if(endIndex < 0 || endIndex > source.length()) endIndex = source.length();
372        if(beginIndex > endIndex) return "";
373        return source.substring(beginIndex, endIndex);
374    }
375
376    /**
377     * Like {@link String#split(String)} but doesn't use any regex for splitting (delimiter is a literal String).
378     * @param source the String to get split-up substrings from
379     * @param delimiter the literal String to split on (not a regex); will not be included in the returned String array
380     * @return a String array consisting of at least one String (all of Source if nothing was split)
381     */
382    public static String[] split(String source, String delimiter) {
383        int amount = count(source, delimiter);
384        if (amount <= 0) return new String[]{source};
385        String[] splat = new String[amount+1];
386        int dl = delimiter.length(), idx = -dl, idx2;
387        for (int i = 0; i < amount; i++) {
388            splat[i] = safeSubstring(source, idx+dl, idx = source.indexOf(delimiter, idx+dl));
389        }
390        if((idx2 = source.indexOf(delimiter, idx+dl)) < 0)
391        {
392            splat[amount] = safeSubstring(source, idx+dl, source.length());
393        }
394        else
395        {
396            splat[amount] = safeSubstring(source, idx+dl, idx2);
397        }
398        return splat;
399    }
400
401    public static final String mask64 = "0000000000000000000000000000000000000000000000000000000000000000",
402            mask32 = "00000000000000000000000000000000",
403            mask16 = "0000000000000000",
404            mask8 = "00000000";
405
406    private static final char[] keyBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=".toCharArray(),
407            valBase64 = new char[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408                    62, 0, 0, 0, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 64, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
409                    0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0};
410
411    private static final StringBuilder hexBuilder = new StringBuilder(16).append("0000000000000000");
412    public static String hex(long number) {
413        for (int i = 0; i < 16; i++) {
414            hexBuilder.setCharAt(15 - i, hexDigits[(int)(number >> (i << 2) & 15)]);
415        }
416        return hexBuilder.toString();
417    }
418
419    public static String hex(double number) {
420        // avoids creating temporary long values, which can be slow on GWT
421        int h = NumberTools.doubleToLowIntBits(number);
422        for (int i = 0; i < 8; i++) {
423            hexBuilder.setCharAt(15 - i, hexDigits[(h >> (i << 2) & 15)]);
424        }
425        h = NumberTools.doubleToHighIntBits(number);
426        for (int i = 0; i < 8; i++) {
427            hexBuilder.setCharAt(7 - i, hexDigits[(h >> (i << 2) & 15)]);
428        }
429        return hexBuilder.toString();
430    }
431
432    public static String hex(int number) {
433        for (int i = 0; i < 8; i++) {
434            hexBuilder.setCharAt(7 - i, hexDigits[(number >> (i << 2) & 15)]);
435        }
436        return hexBuilder.substring(0, 8);
437    }
438    
439    public static String hex(float number) {
440        final int h = NumberTools.floatToIntBits(number);
441        for (int i = 0; i < 8; i++) {
442            hexBuilder.setCharAt(7 - i, hexDigits[(h >> (i << 2) & 15)]);
443        }
444        return hexBuilder.substring(0, 8);
445    }
446
447    public static String hex(short number) {
448        for (int i = 0; i < 4; i++) {
449            hexBuilder.setCharAt(3 - i, hexDigits[(number >> (i << 2) & 15)]);
450        }
451        return hexBuilder.substring(0, 4);
452    }
453
454    public static String hex(char number) {
455        for (int i = 0; i < 4; i++) {
456            hexBuilder.setCharAt(3 - i, hexDigits[(number >> (i << 2) & 15)]);
457        }
458        return hexBuilder.substring(0, 4);
459    }
460
461    public static String hex(byte number) {
462        hexBuilder.setCharAt(0, hexDigits[(number >> 4 & 15)]);
463        hexBuilder.setCharAt(1, hexDigits[(number & 15)]);
464        return hexBuilder.substring(0, 2);
465    }
466
467    public static StringBuilder appendHex(StringBuilder builder, long number){
468        for (int i = 60; i >= 0; i -= 4) {
469            builder.append(hexDigits[(int)(number >> i & 15)]);
470        }
471        return builder;
472    }
473    public static StringBuilder appendHex(StringBuilder builder, double number){
474        // avoids creating temporary long values, which can be slow on GWT
475        int h = NumberTools.doubleToHighIntBits(number);
476        for (int i = 28; i >= 0; i -= 4) {
477            builder.append(hexDigits[(h >> i & 15)]);
478        }
479        h = NumberTools.doubleToLowIntBits(number);
480        for (int i = 28; i >= 0; i -= 4) {
481            builder.append(hexDigits[(h >> i & 15)]);
482        }
483        return builder;
484    }
485    public static StringBuilder appendHex(StringBuilder builder, int number){
486        for (int i = 28; i >= 0; i -= 4) {
487            builder.append(hexDigits[(number >> i & 15)]);
488        }
489        return builder;
490    }
491    public static StringBuilder appendHex(StringBuilder builder, float number){
492        final int h = NumberTools.floatToIntBits(number);
493        for (int i = 28; i >= 0; i -= 4) {
494            builder.append(hexDigits[(h >> i & 15)]);
495        }
496        return builder;
497    }
498    public static StringBuilder appendHex(StringBuilder builder, short number){
499        for (int i = 12; i >= 0; i -= 4) {
500            builder.append(hexDigits[(number >> i & 15)]);
501        }
502        return builder;
503    }
504    public static StringBuilder appendHex(StringBuilder builder, char number){
505        for (int i = 12; i >= 0; i -= 4) {
506            builder.append(hexDigits[(number >> i & 15)]);
507        }
508        return builder;
509    }
510    public static StringBuilder appendHex(StringBuilder builder, byte number){
511        builder.append(hexDigits[(number >> 4 & 15)]);
512        builder.append(hexDigits[(number & 15)]);
513        return builder;
514    }
515
516    public static String hex(long[] numbers) {
517        int len;
518        if (numbers == null || (len = numbers.length) <= 0) return "";
519        StringBuilder sb = new StringBuilder(numbers.length << 4);
520        for (int i = 0; i < len; i++) {
521            appendHex(sb, numbers[i]);
522        }
523        return sb.toString();
524    }
525
526    public static String hex(double[] numbers) {
527        int len;
528        if (numbers == null || (len = numbers.length) <= 0) return "";
529        StringBuilder sb = new StringBuilder(numbers.length << 4);
530        for (int i = 0; i < len; i++) {
531            appendHex(sb, numbers[i]);
532        }
533        return sb.toString();
534    }
535
536    public static String hex(int[] numbers) {
537        int len;
538        if (numbers == null || (len = numbers.length) <= 0) return "";
539        StringBuilder sb = new StringBuilder(numbers.length << 3);
540        for (int i = 0; i < len; i++) {
541            appendHex(sb, numbers[i]);
542        }
543        return sb.toString();
544    }
545
546
547    public static String hex(float[] numbers) {
548        int len;
549        if (numbers == null || (len = numbers.length) <= 0) return "";
550        StringBuilder sb = new StringBuilder(numbers.length << 3);
551        for (int i = 0; i < len; i++) {
552            appendHex(sb, numbers[i]);
553        }
554        return sb.toString();
555    }
556
557    public static String hex(short[] numbers) {
558        int len;
559        if (numbers == null || (len = numbers.length) <= 0) return "";
560        StringBuilder sb = new StringBuilder(numbers.length << 2);
561        for (int i = 0; i < len; i++) {
562            appendHex(sb, numbers[i]);
563        }
564        return sb.toString();
565    }
566
567    public static String hex(char[] numbers) {
568        int len;
569        if (numbers == null || (len = numbers.length) <= 0) return "";
570        StringBuilder sb = new StringBuilder(numbers.length << 2);
571        for (int i = 0; i < len; i++) {
572            appendHex(sb, numbers[i]);
573        }
574        return sb.toString();
575    }
576
577    public static String hex(byte[] numbers) {
578        int len;
579        if (numbers == null || (len = numbers.length) <= 0) return "";
580        StringBuilder sb = new StringBuilder(numbers.length << 1);
581        for (int i = 0; i < len; i++) {
582            appendHex(sb, numbers[i]);
583        }
584        return sb.toString();
585    }
586
587    public static String bin(long number) {
588        String h = Long.toBinaryString(number);
589        return mask64.substring(0, 64 - h.length()) + h;
590    }
591
592    public static String bin(int number) {
593        String h = Integer.toBinaryString(number);
594        return mask32.substring(0, 32 - h.length()) + h;
595    }
596
597    public static String bin(short number) {
598        String h = Integer.toBinaryString(number & 0xffff);
599        return mask16.substring(0, 16 - h.length()) + h;
600    }
601
602    public static String bin(char number) {
603        String h = Integer.toBinaryString(number & 0xffff);
604        return mask16.substring(0, 16 - h.length()) + h;
605    }
606
607    public static String bin(byte number) {
608        String h = Integer.toBinaryString(number & 0xff);
609        return mask8.substring(0, 8 - h.length()) + h;
610    }
611    private static final int[] hexCodes = new int[]
612            {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
613             -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
614             -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
615              0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
616             -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
617             -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
618             -1,10,11,12,13,14,15};
619
620    /**
621     * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start
622     * and returns the long they represent, reading at most 16 characters (17 if there is a sign) and returning the
623     * result if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also
624     * represent negative numbers as they are printed by such methods as String.format given a %x in the formatting
625     * string, or this class' {@link #hex(long)} method; that is, if the first char of a 16-char (or longer)
626     * CharSequence is a hex digit 8 or higher, then the whole number represents a negative number, using two's
627     * complement and so on. This means "FFFFFFFFFFFFFFFF" would return the long -1 when passed to this, though you
628     * could also simply use "-1 ".
629     * <br>
630     * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is an odd omission from earlier JDKs.
631     * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or
632     * stopping the parse process early if a non-hex-digit char is read before the end of cs is reached. If the parse is
633     * stopped early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger
634     * places.
635     * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start)
636     * @return the long that cs represents
637     */
638    public static long longFromHex(final CharSequence cs) {
639        return longFromHex(cs, 0, cs.length());
640    }
641    /**
642     * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start
643     * and returns the long they represent, reading at most 16 characters (17 if there is a sign) and returning the
644     * result if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also
645     * represent negative numbers as they are printed by such methods as String.format given a %x in the formatting
646     * string, or this class' {@link #hex(long)} method; that is, if the first char of a 16-char (or longer)
647     * CharSequence is a hex digit 8 or higher, then the whole number represents a negative number, using two's
648     * complement and so on. This means "FFFFFFFFFFFFFFFF" would return the long -1 when passed to this, though you
649     * could also simply use "-1 ". If you use both '-' at the start and have the most significant digit as 8 or higher,
650     * such as with "-FFFFFFFFFFFFFFFF", then both indicate a negative number, but the digits will be processed first
651     * (producing -1) and then the whole thing will be multiplied by -1 to flip the sign again (returning 1).
652     * <br>
653     * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is an odd omission from earlier JDKs.
654     * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or
655     * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped
656     * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places.
657     * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start)
658     * @param start the (inclusive) first character position in cs to read
659     * @param end the (exclusive) last character position in cs to read (this stops after 16 characters if end is too large)
660     * @return the long that cs represents
661     */
662    public static long longFromHex(final CharSequence cs, final int start, int end) {
663        int len, h, lim = 16;
664        if (cs == null || start < 0 || end <= 0 || end - start <= 0
665                || (len = cs.length()) - start <= 0 || end > len)
666            return 0;
667        char c = cs.charAt(start);
668        if (c == '-') {
669            len = -1;
670            h = 0;
671            lim = 17;
672        } else if (c == '+') {
673            len = 1;
674            h = 0;
675            lim = 17;
676        } else if (c > 102 || (h = hexCodes[c]) < 0)
677            return 0;
678        else {
679            len = 1;
680        }
681        long data = h;
682        for (int i = start + 1; i < end && i < start + lim; i++) {
683            if ((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0)
684                return data * len;
685            data <<= 4;
686            data |= h;
687        }
688        return data * len;
689    }
690    /**
691     * Reads in a char[] containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start and
692     * returns the long they represent, reading at most 16 characters (17 if there is a sign) and returning the result
693     * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent
694     * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or
695     * this class' {@link #hex(long)} method; that is, if the first digit of a 16-char (or longer) char[] is a hex
696     * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This
697     * means "FFFFFFFFFFFFFFFF" would return the long -1L when passed to this, though you could also simply use "-1 ".
698     * If you use both '-' at the start and have the most significant digit as 8 or higher, such as with
699     * "-FFFFFFFFFFFFFFFF", then both indicate a negative number, but the digits will be processed first (producing -1)
700     * and then the whole thing will be multiplied by -1 to flip the sign again (returning 1).
701     * <br>
702     * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is an odd omission from earlier JDKs.
703     * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or
704     * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped
705     * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places.
706     * @param cs a char array containing only hex digits with an optional sign (no 0x at the start)
707     * @param start the (inclusive) first character position in cs to read
708     * @param end the (exclusive) last character position in cs to read (this stops after 8 or 9 characters if end is too large, depending on sign)
709     * @return the long that cs represents
710     */
711    public static long longFromHex(final char[] cs, final int start, int end)
712    {
713        int len, h, lim = 16;
714        if(cs == null || start < 0 || end <=0 || end - start <= 0
715                || (len = cs.length) - start <= 0 || end > len)
716            return 0;
717        char c = cs[start];
718        if(c == '-')
719        {
720            len = -1;
721            h = 0;
722            lim = 17;
723        }
724        else if(c == '+')
725        {
726            len = 1;
727            h = 0;
728            lim = 17;
729        }
730        else if(c > 102 || (h = hexCodes[c]) < 0)
731            return 0;
732        else
733        {
734            len = 1;
735        }
736        int data = h;
737        for (int i = start + 1; i < end && i < start + lim; i++) {
738            if((c = cs[i]) > 102 || (h = hexCodes[c]) < 0)
739                return data * len;
740            data <<= 4;
741            data |= h;
742        }
743        return data * len;
744    }
745
746    /**
747     * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start
748     * and returns the int they represent, reading at most 8 characters (9 if there is a sign) and returning the result
749     * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent
750     * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or
751     * this class' {@link #hex(int)} method; that is, if the first digit of an 8-char (or longer) CharSequence is a hex
752     * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This
753     * means "FFFFFFFF" would return the int -1 when passed to this, though you could also simply use "-1 ". If you use
754     * both '-' at the start and have the most significant digit as 8 or higher, such as with "-FFFFFFFF", then both
755     * indicate a negative number, but the digits will be processed first (producing -1) and then the whole thing will
756     * be multiplied by -1 to flip the sign again (returning 1).
757     * <br>
758     * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is an odd omission from earlier JDKs.
759     * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or
760     * stopping the parse process early if a non-hex-digit char is read before the end of cs is reached. If the parse is
761     * stopped early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger
762     * places.
763     * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start)
764     * @return the int that cs represents
765     */
766    public static int intFromHex(final CharSequence cs) {
767        return intFromHex(cs, 0, cs.length());
768    }
769    /**
770     * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start
771     * and returns the int they represent, reading at most 8 characters (9 if there is a sign) and returning the result
772     * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent
773     * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or
774     * this class' {@link #hex(int)} method; that is, if the first digit of an 8-char (or longer) CharSequence is a hex
775     * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This
776     * means "FFFFFFFF" would return the int -1 when passed to this, though you could also simply use "-1 ". If you use
777     * both '-' at the start and have the most significant digit as 8 or higher, such as with "-FFFFFFFF", then both
778     * indicate a negative number, but the digits will be processed first (producing -1) and then the whole thing will
779     * be multiplied by -1 to flip the sign again (returning 1).
780     * <br>
781     * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is an odd omission from earlier JDKs.
782     * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or
783     * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped
784     * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places.
785     * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start)
786     * @param start the (inclusive) first character position in cs to read
787     * @param end the (exclusive) last character position in cs to read (this stops after 8 or 9 characters if end is too large, depending on sign)
788     * @return the int that cs represents
789     */
790    public static int intFromHex(final CharSequence cs, final int start, int end)
791    {
792        int len, h, lim = 8;
793        if(cs == null || start < 0 || end <=0 || end - start <= 0
794                || (len = cs.length()) - start <= 0 || end > len)
795            return 0;
796        char c = cs.charAt(start);
797        if(c == '-')
798        {
799            len = -1;
800            h = 0;
801            lim = 9;
802        }
803        else if(c == '+')
804        {
805            len = 1;
806            h = 0;
807            lim = 9;
808        }
809        else if(c > 102 || (h = hexCodes[c]) < 0)
810            return 0;
811        else
812        {
813            len = 1;
814        }
815        int data = h;
816        for (int i = start + 1; i < end && i < start + lim; i++) {
817            if((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0)
818                return data * len;
819            data <<= 4;
820            data |= h;
821        }
822        return data * len;
823    }
824    /**
825     * Reads in a char[] containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start
826     * and returns the int they represent, reading at most 8 characters (9 if there is a sign) and returning the result
827     * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent
828     * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or
829     * this class' {@link #hex(int)} method; that is, if the first digit of an 8-char (or longer) char[] is a hex
830     * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This
831     * means "FFFFFFFF" would return the int -1 when passed to this, though you could also simply use "-1 ". If you use
832     * both '-' at the start and have the most significant digit as 8 or higher, such as with "-FFFFFFFF", then both
833     * indicate a negative number, but the digits will be processed first (producing -1) and then the whole thing will
834     * be multiplied by -1 to flip the sign again (returning 1).
835     * <br>
836     * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is an odd omission from earlier JDKs.
837     * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or
838     * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped
839     * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places.
840     * @param cs a char array containing only hex digits with an optional sign (no 0x at the start)
841     * @param start the (inclusive) first character position in cs to read
842     * @param end the (exclusive) last character position in cs to read (this stops after 8 or 9 characters if end is too large, depending on sign)
843     * @return the int that cs represents
844     */
845    public static int intFromHex(final char[] cs, final int start, int end)
846    {
847        int len, h, lim = 8;
848        if(cs == null || start < 0 || end <=0 || end - start <= 0
849                || (len = cs.length) - start <= 0 || end > len)
850            return 0;
851        char c = cs[start];
852        if(c == '-')
853        {
854            len = -1;
855            h = 0;
856            lim = 9;
857        }
858        else if(c == '+')
859        {
860            len = 1;
861            h = 0;
862            lim = 9;
863        }
864        else if(c > 102 || (h = hexCodes[c]) < 0)
865            return 0;
866        else
867        {
868            len = 1;
869        }
870        int data = h;
871        for (int i = start + 1; i < end && i < start + lim; i++) {
872            if((c = cs[i]) > 102 || (h = hexCodes[c]) < 0)
873                return data * len;
874            data <<= 4;
875            data |= h;
876        }
877        return data * len;
878    }
879    /**
880     * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the
881     * long they represent, reading at most 19 characters (20 if there is a sign) and returning the result if valid, or
882     * 0 if nothing could be read. The leading sign can be '+' or '-' if present. Unlike
883     * {@link #intFromDec(CharSequence)}, this can't effectively be used to read unsigned longs as decimal literals,
884     * since anything larger than the highest signed long would be larger than the normal limit for longs as text (it
885     * would be 20 characters without a sign, where we limit it to 19 without a sign to match normal behavior).
886     * <br>
887     * Should be fairly close to the JDK's Long.parseLong method, but this also supports CharSequence data instead of
888     * just String data, and ignores chars after the number. This doesn't throw on invalid input, either, instead
889     * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit
890     * char is read before the end of cs is reached. If the parse is stopped early, this behaves as you would expect for
891     * a number with less digits, and simply doesn't fill the larger places.
892     * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign
893     * @return the long that cs represents
894     */
895    public static long longFromDec(final CharSequence cs) {
896        return longFromDec(cs,0, cs.length());
897    }
898    /**
899     * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the
900     * long they represent between the given positions {@code start} and {@code end}, reading at most 19 characters (20
901     * if there is a sign) or until end is reached and returning the result if valid, or 0 if nothing could be read. The
902     * leading sign can be '+' or '-' if present. Unlike {@link #intFromDec(CharSequence, int, int)}, this can't
903     * effectively be used to read unsigned longs as decimal literals, since anything larger than the highest signed
904     * long would be larger than the normal limit for longs as text (it would be 20 characters without a sign, where we
905     * limit it to 19 without a sign to match normal behavior).
906     * <br>
907     * Should be fairly close to the JDK's Long.parseLong method, but this also supports CharSequence data instead of
908     * just String data, and allows specifying a start and end. This doesn't throw on invalid input, either, instead
909     * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit
910     * char is read before end is reached. If the parse is stopped early, this behaves as you would expect for a number
911     * with less digits, and simply doesn't fill the larger places.
912     * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign
913     * @param start the (inclusive) first character position in cs to read
914     * @param end the (exclusive) last character position in cs to read (this stops after 19 or 20 characters if end is too large, depending on sign)
915     * @return the long that cs represents
916     */
917    public static long longFromDec(final CharSequence cs, final int start, int end)
918    {
919        int len, h, lim = 19;
920        long sign = 1L;
921        if(cs == null || start < 0 || end <=0 || end - start <= 0
922                || (len = cs.length()) - start <= 0 || end > len)
923            return 0L;
924        char c = cs.charAt(start);
925        if(c == '-')
926        {
927            sign = -1L;
928            lim = 20;
929            h = 0;
930        }
931        else if(c == '+')
932        {
933            lim = 20;
934            h = 0;
935        }
936        else if(c > 102 || (h = hexCodes[c]) < 0 || h > 9)
937            return 0L;
938        long data = h;
939        for (int i = start + 1; i < end && i < start + lim; i++) {
940            if((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0 || h > 9)
941                return data * sign;
942            data = data * 10 + h;
943        }
944        return data * sign;
945    }
946    /**
947     * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the
948     * int they represent, reading at most 10 characters (11 if there is a sign) and returning the result if valid, or 0
949     * if nothing could be read. The leading sign can be '+' or '-' if present. This can technically be used to handle
950     * unsigned integers in decimal format, but it isn't the intended purpose. If you do use it for handling unsigned
951     * ints, 2147483647 is normally the highest positive int and -2147483648 the lowest negative one, but if you give
952     * this a number between 2147483647 and {@code 2147483647 + 2147483648}, it will interpret it as a negative number
953     * that fits in bounds using the normal rules for converting between signed and unsigned numbers.
954     * <br>
955     * Should be fairly close to the JDK's Integer.parseInt method, but this also supports CharSequence data instead of
956     * just String data, and ignores chars after the number. This doesn't throw on invalid input, either, instead
957     * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit
958     * char is read before the end of cs is reached. If the parse is stopped early, this behaves as you would expect for
959     * a number with less digits, and simply doesn't fill the larger places.
960     * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign
961     * @return the int that cs represents
962     */
963    public static int intFromDec(final CharSequence cs) {
964        return intFromDec(cs, 0, cs.length());
965    }
966    /**
967     * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the
968     * int they represent, reading at most 10 characters (11 if there is a sign) and returning the result if valid, or 0
969     * if nothing could be read. The leading sign can be '+' or '-' if present. This can technically be used to handle
970     * unsigned integers in decimal format, but it isn't the intended purpose. If you do use it for handling unsigned
971     * ints, 2147483647 is normally the highest positive int and -2147483648 the lowest negative one, but if you give
972     * this a number between 2147483647 and {@code 2147483647 + 2147483648}, it will interpret it as a negative number
973     * that fits in bounds using the normal rules for converting between signed and unsigned numbers.
974     * <br>
975     * Should be fairly close to the JDK's Integer.parseInt method, but this also supports CharSequence data instead of
976     * just String data, and allows specifying a start and end. This doesn't throw on invalid input, either, instead
977     * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit
978     * char is read before end is reached. If the parse is stopped early, this behaves as you would expect for a number
979     * with less digits, and simply doesn't fill the larger places.
980     * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign
981     * @param start the (inclusive) first character position in cs to read
982     * @param end the (exclusive) last character position in cs to read (this stops after 10 or 11 characters if end is too large, depending on sign)
983     * @return the int that cs represents
984     */
985    public static int intFromDec(final CharSequence cs, final int start, int end)
986    {
987        int len, h, lim = 10;
988        if(cs == null || start < 0 || end <=0 || end - start <= 0
989                || (len = cs.length()) - start <= 0 || end > len)
990            return 0;
991        char c = cs.charAt(start);
992        if(c == '-')
993        {
994            len = -1;
995            lim = 11;
996            h = 0;
997        }
998        else if(c == '+')
999        {
1000            len = 1;
1001            lim = 11;
1002            h = 0;
1003        }
1004        else if(c > 102 || (h = hexCodes[c]) < 0 || h > 9)
1005            return 0;
1006        else
1007        {
1008            len = 1;
1009        }
1010        int data = h;
1011        for (int i = start + 1; i < end && i < start + lim; i++) {
1012            if((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0 || h > 9)
1013                return data * len;
1014            data = data * 10 + h;
1015        }
1016        return data * len;
1017    }
1018    /**
1019     * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the long they represent,
1020     * reading at most 64 characters and returning the result if valid or 0 otherwise. The first digit is considered
1021     * the sign bit iff cs is 64 chars long.
1022     * <br>
1023     * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is a bizarre omission from earlier JDKs.
1024     * This doesn't throw on invalid input, though, instead returning 0.
1025     * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start)
1026     * @return the long that cs represents
1027     */
1028    public static long longFromBin(CharSequence cs)
1029    {
1030        return longFromBin(cs, 0, cs.length());
1031    }
1032
1033    /**
1034     * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the long they represent,
1035     * reading at most 64 characters and returning the result if valid or 0 otherwise. The first digit is considered
1036     * the sign bit iff cs is 64 chars long.
1037     * <br>
1038     * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is a bizarre omission from earlier JDKs.
1039     * This doesn't throw on invalid input, though, instead returning 0.
1040     * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start)
1041     * @param start the first character position in cs to read from
1042     * @param end the last character position in cs to read from (this stops after 64 characters if end is too large)
1043     * @return the long that cs represents
1044     */
1045    public static long longFromBin(CharSequence cs, final int start, final int end)
1046    {
1047        int len;
1048        if(cs == null || start < 0 || end <=0 || end - start <= 0
1049                || (len = cs.length()) - start <= 0 || end > len)
1050            return 0;
1051        char c = cs.charAt(start);
1052        if(c < '0' || c > '1')
1053            return 0;
1054        long data = hexCodes[c];
1055        for (int i = start+1; i < end && i < start+64; i++) {
1056            if((c = cs.charAt(i)) < '0' || c > '1')
1057                return 0;
1058            data <<= 1;
1059            data |= c - '0';
1060        }
1061        return data;
1062    }
1063    /**
1064     * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the int they represent,
1065     * reading at most 32 characters and returning the result if valid or 0 otherwise. The first digit is considered
1066     * the sign bit iff cs is 32 chars long.
1067     * <br>
1068     * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is a bizarre omission from earlier
1069     * JDKs. This doesn't throw on invalid input, though, instead returning 0.
1070     * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start)
1071     * @return the int that cs represents
1072     */
1073    public static int intFromBin(CharSequence cs)
1074    {
1075        return intFromBin(cs, 0, cs.length());
1076    }
1077
1078    /**
1079     * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the int they represent,
1080     * reading at most 32 characters and returning the result if valid or 0 otherwise. The first digit is considered
1081     * the sign bit iff cs is 32 chars long.
1082     * <br>
1083     * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is a bizarre omission from earlier
1084     * JDKs. This doesn't throw on invalid input, though, instead returning 0.
1085     * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start)
1086     * @param start the first character position in cs to read from
1087     * @param end the last character position in cs to read from (this stops after 32 characters if end is too large)
1088     * @return the int that cs represents
1089     */
1090    public static int intFromBin(CharSequence cs, final int start, final int end)
1091    {
1092        int len;
1093        if(cs == null || start < 0 || end <=0 || end - start <= 0
1094                || (len = cs.length()) - start <= 0 || end > len)
1095            return 0;
1096        char c = cs.charAt(start);
1097        if(c < '0' || c > '1')
1098            return 0;
1099        int data = hexCodes[c];
1100        for (int i = start+1; i < end && i < start+32; i++) {
1101            if((c = cs.charAt(i)) < '0' || c > '1')
1102                return 0;
1103            data <<= 1;
1104            data |= c - '0';
1105        }
1106        return data;
1107    }
1108
1109    /**
1110     * Base-64 encodes number and stores that string representation in buf starting at offset; uses 11 chars.
1111     *
1112     * @param number the long to encode
1113     * @param offset the first position to set in buf
1114     * @param buf    a char array that should be non-null and have length of at least offset + 11
1115     * @return buf, after modifying it in-place
1116     */
1117    public static char[] b64Encode(long number, int offset, char[] buf) {
1118        if (buf != null && buf.length >= 11 - offset) {
1119            buf[offset] = keyBase64[(int) (number >>> 60)];
1120            buf[offset + 1] = keyBase64[(int) (0x3f & number >>> 54)];
1121            buf[offset + 2] = keyBase64[(int) (0x3f & number >>> 48)];
1122            buf[offset + 3] = keyBase64[(int) (0x3f & number >>> 42)];
1123            buf[offset + 4] = keyBase64[(int) (0x3f & number >>> 36)];
1124            buf[offset + 5] = keyBase64[(int) (0x3f & number >>> 30)];
1125            buf[offset + 6] = keyBase64[(int) (0x3f & number >>> 24)];
1126            buf[offset + 7] = keyBase64[(int) (0x3f & number >>> 18)];
1127            buf[offset + 8] = keyBase64[(int) (0x3f & number >>> 12)];
1128            buf[offset + 9] = keyBase64[(int) (0x3f & number >>> 6)];
1129            buf[offset + 10] = keyBase64[(int) (0x3f & number)];
1130        }
1131        return buf;
1132    }
1133
1134
1135    /**
1136     * Base-64 encodes number and stores that string representation in buf starting at offset; uses 11 chars.
1137     *
1138     * @param number the double to encode
1139     * @param offset the first position to set in buf
1140     * @param buf    a char array that should be non-null and have length of at least offset + 11
1141     * @return buf, after modifying it in-place
1142     */
1143    public static char[] b64Encode(double number, int offset, char[] buf) {
1144        return b64Encode(NumberTools.doubleToLongBits(number), offset, buf);
1145    }
1146
1147    /**
1148     * Base-64 encodes number and stores that string representation in buf starting at offset; uses 6 chars.
1149     *
1150     * @param number the int to encode
1151     * @param offset the first position to set in buf
1152     * @param buf    a char array that should be non-null and have length of at least offset + 6
1153     * @return buf, after modifying it in-place
1154     */
1155    public static char[] b64Encode(int number, int offset, char[] buf) {
1156        if (buf != null && buf.length >= 6 - offset) {
1157            buf[offset] = keyBase64[number >>> 30];
1158            buf[offset + 1] = keyBase64[0x3f & number >>> 24];
1159            buf[offset + 2] = keyBase64[0x3f & number >>> 18];
1160            buf[offset + 3] = keyBase64[0x3f & number >>> 12];
1161            buf[offset + 4] = keyBase64[0x3f & number >>> 6];
1162            buf[offset + 5] = keyBase64[0x3f & number];
1163        }
1164        return buf;
1165    }
1166
1167    /**
1168     * Base-64 encodes number and stores that string representation in buf starting at offset; uses 6 chars.
1169     *
1170     * @param number the float to encode
1171     * @param offset the first position to set in buf
1172     * @param buf    a char array that should be non-null and have length of at least offset + 6
1173     * @return buf, after modifying it in-place
1174     */
1175    public static char[] b64Encode(float number, int offset, char[] buf) {
1176        return b64Encode(NumberTools.floatToIntBits(number), offset, buf);
1177    }
1178
1179    /**
1180     * Base-64 encodes number and stores that string representation in buf starting at offset; uses 3 chars.
1181     *
1182     * @param number the int to encode
1183     * @param offset the first position to set in buf
1184     * @param buf    a char array that should be non-null and have length of at least offset + 3
1185     * @return buf, after modifying it in-place
1186     */
1187    public static char[] b64Encode(short number, int offset, char[] buf) {
1188        if (buf != null && buf.length >= 3 - offset) {
1189            buf[offset] = keyBase64[number >>> 12];
1190            buf[offset + 1] = keyBase64[0x3f & number >>> 6];
1191            buf[offset + 2] = keyBase64[0x3f & number];
1192        }
1193        return buf;
1194    }
1195
1196    /**
1197     * Base-64 encodes glyph and stores that string representation in buf starting at offset; uses 3 chars.
1198     *
1199     * @param glyph  the char to encode
1200     * @param offset the first position to set in buf
1201     * @param buf    a char array that should be non-null and have length of at least offset + 3
1202     * @return buf, after modifying it in-place
1203     */
1204    public static char[] b64Encode(char glyph, int offset, char[] buf) {
1205        if (buf != null && buf.length >= 4 - offset) {
1206            buf[offset] = keyBase64[glyph >>> 12];
1207            buf[offset + 1] = keyBase64[0x3f & glyph >>> 6];
1208            buf[offset + 2] = keyBase64[0x3f & glyph];
1209        }
1210        return buf;
1211    }
1212
1213    /**
1214     * Base-64 encodes number and stores that string representation in buf starting at offset; uses 2 chars.
1215     *
1216     * @param number the byte to encode
1217     * @param offset the first position to set in buf
1218     * @param buf    a char array that should be non-null and have length of at least offset + 2
1219     * @return buf, after modifying it in-place
1220     */
1221    public static char[] b64Encode(byte number, int offset, char[] buf) {
1222        if (buf != null && buf.length >= 2 - offset) {
1223            buf[offset] = keyBase64[number >>> 6];
1224            buf[offset + 1] = keyBase64[0x3f & number];
1225        }
1226        return buf;
1227
1228    }
1229
1230    /**
1231     * Decodes 11 characters from data starting from offset to get a long encoded as base-64.
1232     * @param data a char array that should be have length of at least offset + 11
1233     * @param offset where in data to start reading from
1234     * @return the decoded long
1235     */
1236    public static long b64DecodeLong(char[] data, int offset) {
1237        return (data == null || data.length < 11 + offset) ? 0L :
1238                (((long)valBase64[data[offset] & 0x7F]) << 60)
1239                        | ((0x3fL & valBase64[data[offset + 1 ] & 0x7F]) << 54)
1240                        | ((0x3fL & valBase64[data[offset + 2 ] & 0x7F]) << 48)
1241                        | ((0x3fL & valBase64[data[offset + 3 ] & 0x7F]) << 42)
1242                        | ((0x3fL & valBase64[data[offset + 4 ] & 0x7F]) << 36)
1243                        | ((0x3fL & valBase64[data[offset + 5 ] & 0x7F]) << 30)
1244                        | ((0x3fL & valBase64[data[offset + 6 ] & 0x7F]) << 24)
1245                        | ((0x3fL & valBase64[data[offset + 7 ] & 0x7F]) << 18)
1246                        | ((0x3fL & valBase64[data[offset + 8 ] & 0x7F]) << 12)
1247                        | ((0x3fL & valBase64[data[offset + 9 ] & 0x7F]) << 6)
1248                        | (0x3fL  & valBase64[data[offset + 10] & 0x7F]);
1249    }
1250
1251    /**
1252     * Decodes 11 characters from data starting from offset to get a double encoded as base-64.
1253     * @param data a char array that should be have length of at least offset + 11
1254     * @param offset where in data to start reading from
1255     * @return the decoded double
1256     */
1257    public static double b64DecodeDouble(char[] data, int offset) {
1258        return (data == null || data.length < 11 + offset) ? 0.0 :
1259                NumberTools.longBitsToDouble((((long) valBase64[data[offset] & 0x7F]) << 60)
1260                        | ((0x3fL & valBase64[data[offset + 1 ] & 0x7F]) << 54)
1261                        | ((0x3fL & valBase64[data[offset + 2 ] & 0x7F]) << 48)
1262                        | ((0x3fL & valBase64[data[offset + 3 ] & 0x7F]) << 42)
1263                        | ((0x3fL & valBase64[data[offset + 4 ] & 0x7F]) << 36)
1264                        | ((0x3fL & valBase64[data[offset + 5 ] & 0x7F]) << 30)
1265                        | ((0x3fL & valBase64[data[offset + 6 ] & 0x7F]) << 24)
1266                        | ((0x3fL & valBase64[data[offset + 7 ] & 0x7F]) << 18)
1267                        | ((0x3fL & valBase64[data[offset + 8 ] & 0x7F]) << 12)
1268                        | ((0x3fL & valBase64[data[offset + 9 ] & 0x7F]) << 6)
1269                        | (0x3fL  & valBase64[data[offset + 10] & 0x7F]));
1270    }
1271
1272    /**
1273     * Decodes 6 characters from data starting from offset to get an int encoded as base-64.
1274     * @param data a char array that should be have length of at least offset + 6
1275     * @param offset where in data to start reading from
1276     * @return the decoded int
1277     */
1278    public static int b64DecodeInt(char[] data, int offset) {
1279        return (data == null || data.length < 6 + offset) ? 0 :
1280                ((valBase64[data[offset] & 0x7F]) << 30)
1281                        | ((valBase64[data[offset + 1] & 0x7F]) << 24)
1282                        | ((valBase64[data[offset + 2] & 0x7F]) << 18)
1283                        | ((valBase64[data[offset + 3] & 0x7F]) << 12)
1284                        | ((valBase64[data[offset + 4] & 0x7F]) << 6)
1285                        | (valBase64[data[offset + 5] & 0x7F]);
1286    }
1287
1288    /**
1289     * Decodes 6 characters from data starting from offset to get a float encoded as base-64.
1290     * @param data a char array that should be have length of at least offset + 6
1291     * @param offset where in data to start reading from
1292     * @return the decoded float
1293     */
1294    public static float b64DecodeFloat(char[] data, int offset) {
1295        return (data == null || data.length < 6 + offset) ? 0f :
1296                NumberTools.intBitsToFloat(((valBase64[data[offset] & 0x7F]) << 30)
1297                        | ((valBase64[data[offset + 1] & 0x7F]) << 24)
1298                        | ((valBase64[data[offset + 2] & 0x7F]) << 18)
1299                        | ((valBase64[data[offset + 3] & 0x7F]) << 12)
1300                        | ((valBase64[data[offset + 4] & 0x7F]) << 6)
1301                        | ( valBase64[data[offset + 5] & 0x7F]));
1302    }
1303
1304    /**
1305     * Decodes 3 characters from data starting from offset to get a short encoded as base-64.
1306     * @param data a char array that should be have length of at least offset + 3
1307     * @param offset where in data to start reading from
1308     * @return the decoded short
1309     */
1310    public static short b64DecodeShort(char[] data, int offset) {
1311        return (short) ((data == null || data.length < 3 + offset) ? 0 :
1312                ((valBase64[data[offset] & 0x7F]) << 12)
1313                        | ((valBase64[data[offset + 1] & 0x7F]) << 6)
1314                        | ( valBase64[data[offset + 2] & 0x7F]));
1315    }
1316    /**
1317     * Decodes 3 characters from data starting from offset to get a char encoded as base-64.
1318     * @param data a char array that should be have length of at least offset + 3
1319     * @param offset where in data to start reading from
1320     * @return the decoded char
1321     */
1322    public static char b64DecodeChar(char[] data, int offset) {
1323        return (char) ((data == null || data.length < 3 + offset) ? 0 :
1324                ((valBase64[data[offset] & 0x7F]) << 12)
1325                        | ((valBase64[data[offset + 1] & 0x7F]) << 6)
1326                        | ( valBase64[data[offset + 2] & 0x7F]));
1327    }
1328
1329    /**
1330     * Decodes 2 characters from data starting from offset to get a byte encoded as base-64.
1331     * @param data a char array that should be have length of at least offset + 2
1332     * @param offset where in data to start reading from
1333     * @return the decoded byte
1334     */
1335    public static byte b64DecodeByte(char[] data, int offset) {
1336        return (byte) ((data == null || data.length < 2 + offset) ? 0 :
1337                ((valBase64[data[offset] & 0x7F]) << 6)
1338                        | (valBase64[data[offset + 1] & 0x7F]));
1339    }
1340
1341    public static String hexHash(boolean... array) {
1342        return hex(CrossHash.hash64(array));
1343    }
1344
1345    public static String hexHash(byte... array) {
1346        return hex(CrossHash.hash64(array));
1347    }
1348
1349    public static String hexHash(short... array) {
1350        return hex(CrossHash.hash64(array));
1351    }
1352
1353    public static String hexHash(char... array) {
1354        return hex(CrossHash.hash64(array));
1355    }
1356
1357    public static String hexHash(int... array) {
1358        return hex(CrossHash.hash64(array));
1359    }
1360
1361    public static String hexHash(long... array) {
1362        return hex(CrossHash.hash64(array));
1363    }
1364
1365    /**
1366     * If text is shorter than the given minimumLength, returns a String with text padded on the right with spaces until
1367     * it reaches that length; otherwise it simply returns text.
1368     * @param text the text to pad if necessary
1369     * @param minimumLength the minimum length of String to return
1370     * @return text, potentially padded with spaces to reach the given minimum length
1371     */
1372    public static String padRight(String text, int minimumLength)
1373    {
1374        if(text.length() < minimumLength)
1375            return padRightStrict(text, ' ', minimumLength);
1376        return text;
1377    }
1378
1379    /**
1380     * If text is shorter than the given minimumLength, returns a String with text padded on the right with padChar
1381     * until it reaches that length; otherwise it simply returns text.
1382     * @param text the text to pad if necessary
1383     * @param padChar the char to use to pad text, if necessary
1384     * @param minimumLength the minimum length of String to return
1385     * @return text, potentially padded with padChar to reach the given minimum length
1386     */
1387    public static String padRight(String text, char padChar, int minimumLength)
1388    {
1389        if(text.length() < minimumLength)
1390            return padRightStrict(text, padChar, minimumLength);
1391        return text;
1392    }
1393
1394    /**
1395     * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on
1396     * its right side with spaces until totalLength is reached. If text is longer than totalLength, this only uses the
1397     * portion of text needed to fill totalLength, and no more.
1398     * @param text the String to pad if necessary, or truncate if too long
1399     * @param totalLength the exact length of String to return
1400     * @return a String with exactly totalLength for its length, made from text and possibly extra spaces
1401     */
1402    public static String padRightStrict(String text, int totalLength) {
1403        return padRightStrict(text, ' ', totalLength);
1404    }
1405
1406    /**
1407     * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on
1408     * its right side with padChar until totalLength is reached. If text is longer than totalLength, this only uses the
1409     * portion of text needed to fill totalLength, and no more.
1410     * @param text the String to pad if necessary, or truncate if too long
1411     * @param padChar the char to use to fill any remaining length
1412     * @param totalLength the exact length of String to return
1413     * @return a String with exactly totalLength for its length, made from text and possibly padChar
1414     */
1415    public static String padRightStrict(String text, char padChar, int totalLength) {
1416        char[] c = new char[totalLength];
1417        int len = text.length();
1418        text.getChars(0, Math.min(len, totalLength), c, 0);
1419        for (int i = len; i < totalLength; i++) {
1420            c[i] = padChar;
1421        }
1422        return String.valueOf(c);
1423    }
1424
1425    /**
1426     * If text is shorter than the given minimumLength, returns a String with text padded on the left with spaces until
1427     * it reaches that length; otherwise it simply returns text.
1428     * @param text the text to pad if necessary
1429     * @param minimumLength the minimum length of String to return
1430     * @return text, potentially padded with spaces to reach the given minimum length
1431     */
1432    public static String padLeft(String text, int minimumLength)
1433    {
1434        if(text.length() < minimumLength)
1435            return padLeftStrict(text, ' ', minimumLength);
1436        return text;
1437    }
1438    /**
1439     * If text is shorter than the given minimumLength, returns a String with text padded on the left with padChar until
1440     * it reaches that length; otherwise it simply returns text.
1441     * @param text the text to pad if necessary
1442     * @param padChar the char to use to pad text, if necessary
1443     * @param minimumLength the minimum length of String to return
1444     * @return text, potentially padded with padChar to reach the given minimum length
1445     */
1446    public static String padLeft(String text, char padChar, int minimumLength)
1447    {
1448        if(text.length() < minimumLength)
1449            return padLeftStrict(text, padChar, minimumLength);
1450        return text;
1451    }
1452
1453    /**
1454     * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on
1455     * its left side with spaces until totalLength is reached. If text is longer than totalLength, this only uses the
1456     * portion of text needed to fill totalLength, and no more.
1457     * @param text the String to pad if necessary, or truncate if too long
1458     * @param totalLength the exact length of String to return
1459     * @return a String with exactly totalLength for its length, made from text and possibly extra spaces
1460     */
1461    public static String padLeftStrict(String text, int totalLength) {
1462        return padLeftStrict(text, ' ', totalLength);
1463    }
1464
1465    /**
1466     * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on
1467     * its left side with padChar until totalLength is reached. If text is longer than totalLength, this only uses the
1468     * portion of text needed to fill totalLength, and no more.
1469     * @param text the String to pad if necessary, or truncate if too long
1470     * @param padChar the char to use to fill any remaining length
1471     * @param totalLength the exact length of String to return
1472     * @return a String with exactly totalLength for its length, made from text and possibly padChar
1473     */
1474    public static String padLeftStrict(String text, char padChar, int totalLength) {
1475        char[] c = new char[totalLength];
1476        int len = text.length();
1477        text.getChars(0, Math.min(len, totalLength), c, Math.max(0, totalLength - len));
1478        for (int i = totalLength - len - 1; i >= 0; i--) {
1479            c[i] = padChar;
1480        }
1481        return String.valueOf(c);
1482    }
1483
1484    /**
1485     * Word-wraps the given String (or other CharSequence, such as a StringBuilder) so it is split into zero or more
1486     * Strings as lines of text, with the given width as the maximum width for a line. This correctly splits most (all?)
1487     * text in European languages on spaces (treating all whitespace characters matched by the regex '\\s' as breaking),
1488     * and also uses the English-language rule (probably used in other languages as well) of splitting on hyphens and
1489     * other dash characters (Unicode category Pd) in the middle of a word. This means for a phrase like "UN Secretary
1490     * General Ban-Ki Moon", if the width was 12, then the Strings in the List returned would be
1491     * <br>
1492     * <pre>
1493     * "UN Secretary"
1494     * "General Ban-"
1495     * "Ki Moon"
1496     * </pre>
1497     * Spaces are not preserved if they were used to split something into two lines, but dashes are.
1498     * @param longText a probably-large piece of text that needs to be split into multiple lines with a max width
1499     * @param width the max width to use for any line, removing trailing whitespace at the end of a line
1500     * @return a List of Strings for the lines after word-wrapping
1501     */
1502    public static List<String> wrap(CharSequence longText, int width)
1503    {
1504        if(width <= 0)
1505            return new ArrayList<>(0);
1506        return wrap(new ArrayList<String>(longText.length() / width + 2), longText, width);
1507    }
1508    /**
1509     * Word-wraps the given String (or other CharSequence, such as a StringBuilder) so it is split into zero or more
1510     * Strings as lines of text, with the given width as the maximum width for a line; appends the word-wrapped lines to
1511     * the given List of Strings and does not create a new List. This correctly splits most (all?) text in European
1512     * languages on spaces (treating all whitespace characters matched by the regex '\\s' as breaking), and also uses
1513     * the English-language rule (probably used in other languages as well) of splitting on hyphens and other dash
1514     * characters (Unicode category Pd) in the middle of a word. This means for a phrase like "UN Secretary General
1515     * Ban-Ki Moon", if the width was 12, then the Strings in the List returned would be
1516     * <br>
1517     * <pre>
1518     * "UN Secretary"
1519     * "General Ban-"
1520     * "Ki Moon"
1521     * </pre>
1522     * Spaces are not preserved if they were used to split something into two lines, but dashes are.
1523     * @param receiving the List of String to append the word-wrapped lines to
1524     * @param longText a probably-large piece of text that needs to be split into multiple lines with a max width
1525     * @param width the max width to use for any line, removing trailing whitespace at the end of a line
1526     * @return the given {@code receiving} parameter, after appending the lines from word-wrapping
1527     */
1528    public static List<String> wrap(List<String> receiving, CharSequence longText, int width)
1529    {
1530        if(width <= 0 || receiving == null)
1531            return receiving;
1532        Matcher widthMatcher = Pattern.compile("(?:({=Y}(?!\\s).{1," + width + "})((?<=\\p{Pd})|(\\s+)))|({=Y}\\S{1," + width + "})").matcher(longText + "\n");
1533        while (widthMatcher.find())
1534        {
1535            receiving.add(widthMatcher.group("Y"));
1536        }
1537        return receiving;
1538    }
1539
1540    public static String replace(CharSequence text, String before, String after) {
1541        if(text instanceof String)
1542        {
1543            return ((String)text).replace(before, after);
1544        }
1545        String t = text.toString();
1546        return t.replace(before, after);
1547    }
1548
1549    public static final Pattern whitespacePattern = Pattern.compile("\\s+"),
1550            nonSpacePattern = Pattern.compile("\\S+");
1551    private static final Matcher matcher = new Matcher(whitespacePattern);
1552    public static int indexOf(CharSequence text, Pattern regex, int beginIndex)
1553    {
1554        matcher.setPattern(regex);
1555        matcher.setTarget(text);
1556        matcher.setPosition(beginIndex);
1557        if(!matcher.find())
1558            return -1;
1559        return matcher.start();
1560    }
1561    public static int indexOf(CharSequence text, String regex, int beginIndex)
1562    {
1563        matcher.setPattern(Pattern.compile(regex));
1564        matcher.setTarget(text);
1565        matcher.setPosition(beginIndex);
1566        if(!matcher.find())
1567            return -1;
1568        return matcher.start();
1569    }
1570    public static int indexOf(CharSequence text, Pattern regex)
1571    {
1572        matcher.setPattern(regex);
1573        matcher.setTarget(text);
1574        if(!matcher.find())
1575            return -1;
1576        return matcher.start();
1577    }
1578    public static int indexOf(CharSequence text, String regex)
1579    {
1580        matcher.setPattern(Pattern.compile(regex));
1581        matcher.setTarget(text);
1582        if(!matcher.find())
1583            return -1;
1584        return matcher.start();
1585    }
1586    private static final Matcher capitalizeMatcher = Pattern.compile("(?<!\\pL)(\\pL)(\\pL*)(\\PL*)").matcher();
1587    private static final StringBuilder sb = new StringBuilder(64);
1588
1589    /**
1590     * Capitalizes Each Word In The Parameter {@code original}, Returning A New String.
1591     * @param original a CharSequence, such as a StringBuilder or String, which could have CrAzY capitalization
1592     * @return A String With Each Word Capitalized At The Start And The Rest In Lower Case 
1593     */
1594    public static String capitalize(final CharSequence original) {
1595        if (original == null || original.length() <= 0) {
1596            return "";
1597        }
1598        sb.setLength(0);
1599        capitalizeMatcher.setTarget(original);
1600        while (capitalizeMatcher.find()) {
1601            sb.append(capitalizeMatcher.group(1).toUpperCase());
1602            capitalizeMatcher.getGroup(2, sb, 1); // mode 1 is case-insensitive, which lower-cases result
1603            capitalizeMatcher.getGroup(3, sb);
1604        }
1605        return sb.toString();
1606    }
1607    private static final Matcher sentenceMatcher = Pattern.compile("(\\PL*)((\\pL)([^.?!]*)($|[.?!]+))(\\PL*)").matcher();
1608    // group 1 before letters, group 2 whole sentence, group 3 first letter, group 4 rest of sentence, group 5 closing punctuation, group 6 remainder of non-letters 
1609
1610    /**
1611     * Attempts to scan for sentences in {@code original}, capitalizes the first letter of each sentence, and otherwise
1612     * leaves the CharSequence untouched as it returns it as a String. Sentences are detected with a crude heuristic of
1613     * "does it have periods, exclamation marks, or question marks at the end, or does it reach the end of input? If
1614     * yes, it's a sentence."
1615     * @param original a CharSequence that is expected to contain sentence-like data that needs capitalization; existing upper-case letters will stay upper-case.
1616     * @return a String where the first letter of each sentence (detected as best this can) is capitalized.
1617     */
1618    public static String sentenceCase(final CharSequence original) {
1619        if (original == null || original.length() <= 0) {
1620            return "";
1621        }
1622        sb.setLength(0);
1623        sentenceMatcher.setTarget(original);
1624        while (sentenceMatcher.find()) {
1625            sentenceMatcher.getGroup(1, sb);
1626            sb.append(sentenceMatcher.group(3).toUpperCase());
1627            sentenceMatcher.getGroup(4, sb); // use getGroup(4, sb, 1) if this should lower-case the rest
1628            sentenceMatcher.getGroup(5, sb);
1629            sentenceMatcher.getGroup(6, sb);
1630        }
1631        return sb.toString();
1632    }
1633    private static final Replacer anReplacer = new Replacer(Pattern.compile("\\b([Aa])(\\p{G}+)(?="+FakeLanguageGen.anyVowel+")", Pattern.IGNORE_CASE | Pattern.UNICODE), "$1n$2");
1634
1635    /**
1636     * A simple method that looks for any occurrences of the word 'a' followed by some non-zero amount of whitespace and
1637     * then any vowel starting the following word (such as 'a item'), then replaces each such improper 'a' with 'an'
1638     * (such as 'an item'). The regex used here isn't bulletproof, but it should be fairly robust, handling when you
1639     * have multiple whitespace chars, different whitespace chars (like carriage return and newline), accented vowels in
1640     * the following word (but not in the initial 'a', which is expected to use English spelling rules), and the case of
1641     * the initial 'a' or 'A'.
1642     * <br>
1643     * Gotta love Regexodus; this is a two-liner that uses features specific to that regular expression library.
1644     * @param text the (probably generated English) multi-word text to search for 'a' in and possibly replace with 'an'
1645     * @return a new String with every improper 'a' replaced
1646     */
1647    public static String correctABeforeVowel(final CharSequence text){
1648        return anReplacer.replace(text);
1649    }
1650
1651    /**
1652     * Constant storing the 16 hexadecimal digits, as char values, in order.
1653     */
1654    public static final char[] hexDigits = {
1655            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
1656    };
1657
1658    /**
1659     * A constant containing only chars that are reasonably likely to be supported by broad fonts and thus display-able.
1660     * This assumes the font supports Latin, Greek, and Cyrillic alphabets, with good support for extended Latin (at
1661     * least for European languages) but not required to be complete enough to support the very large Vietnamese set of
1662     * extensions to Latin, nor to support any International Phonetic Alphabet (IPA) chars. It also assumes box drawing
1663     * characters are supported and a handful of common dingbats, such as male and female signs. It does not include
1664     * the tab, newline, or carriage return characters, since these don't usually make sense on a grid of chars.
1665     */
1666    public static final String PERMISSIBLE_CHARS =
1667            " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmno"+
1668            "pqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàá"+
1669            "âãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİı"+
1670            "ĴĵĶķĹĺĻļĽľĿŀŁłŃńŅņŇňŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƒǺǻǼǽǾǿ"+
1671            "ȘșȚțȷˆˇˉˋ˘˙˚˛˜˝΄΅Ά·ΈΉΊΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυ"+
1672            "φχψωϊϋόύώЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхц"+
1673            "чшщъыьэюяѐёђѓєѕіїјљњћќѝўџѴѵҐґẀẁẂẃẄẅỲỳ–—‘’‚‛“”„†‡•…‰‹›ⁿ₤€№™Ω℮←↑→↓∆−√≈" +
1674            "─│┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬■□▲▼○●◦♀♂♠♣♥♦♪";
1675
1676    public static final String BOX_DRAWING_SINGLE = "─│┌┐└┘├┤┬┴┼";
1677    public static final String BOX_DRAWING_DOUBLE = "═║╔╗╚╝╠╣╦╩╬";
1678    public static final String BOX_DRAWING = "─│┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬";
1679    public static final String VISUAL_SYMBOLS = "←↑→↓■□▲▼○●◦♀♂♠♣♥♦♪";
1680    public static final String DIGITS = "0123456789";
1681    public static final String MARKS = "~`^'¨¯°´¸ˆˇˉˋ˘˙˚˛˜˝΄΅‘’‚‛";
1682    /**
1683     * Can be used to match an index with one in {@link #GROUPING_SIGNS_CLOSE} to find the closing char (this way only).
1684     */
1685    public static final String GROUPING_SIGNS_OPEN  = "([{<«‘‛“‹";
1686    /**
1687     * An index in {@link #GROUPING_SIGNS_OPEN} can be used here to find the closing char for that opening one.
1688     */
1689    public static final String GROUPING_SIGNS_CLOSE = ")]}>»’’”›";
1690    public static final String COMMON_PUNCTUATION = "!\"%&'*+,-./:;<>?•…–—";
1691    public static final String MODERN_PUNCTUATION = "@\\^_`|~¦©®™´№♀♂♪";
1692    public static final String UNCOMMON_PUNCTUATION = "§¶¨ªº¯°·¸¡¿·‚„†‡";
1693    public static final String TECHNICAL_PUNCTUATION = "#%'*+,-./<=>^|¬°µ±¹²³ⁿ¼½¾×÷‰№Ω℮∆−√≈";
1694    public static final String PUNCTUATION = COMMON_PUNCTUATION + MODERN_PUNCTUATION + UNCOMMON_PUNCTUATION +
1695            TECHNICAL_PUNCTUATION + GROUPING_SIGNS_OPEN + GROUPING_SIGNS_CLOSE;
1696    public static final String CURRENCY = "$¢£¤¥₤€";
1697    public static final String SPACING = " ";
1698    public static final String ENGLISH_LETTERS_UPPER =
1699            "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1700    public static final String ENGLISH_LETTERS_LOWER =
1701            "abcdefghijklmnopqrstuvwxyz";
1702    public static final String ENGLISH_LETTERS = ENGLISH_LETTERS_UPPER + ENGLISH_LETTERS_LOWER;
1703
1704    public static final String LATIN_EXTENDED_LETTERS_UPPER =
1705            "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŨŪŬŮŰŲŴŶŸŹŻŽǺǼǾȘȚẀẂẄỲßSFJ";
1706    public static final String LATIN_EXTENDED_LETTERS_LOWER =
1707            "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıĵķĺļľŀłńņňŋōŏőœŕŗřśŝşšţťũūŭůűųŵŷÿźżžǻǽǿșțẁẃẅỳßſƒȷ";
1708    public static final String LATIN_EXTENDED_LETTERS = LATIN_EXTENDED_LETTERS_UPPER + LATIN_EXTENDED_LETTERS_LOWER;
1709
1710    public static final String LATIN_LETTERS_UPPER = ENGLISH_LETTERS_UPPER + LATIN_EXTENDED_LETTERS_UPPER;
1711    public static final String LATIN_LETTERS_LOWER = ENGLISH_LETTERS_LOWER + LATIN_EXTENDED_LETTERS_LOWER;
1712    public static final String LATIN_LETTERS = LATIN_LETTERS_UPPER + LATIN_LETTERS_LOWER;
1713
1714    /**
1715     * Includes the letter Sigma, 'Σ', twice because it has two lower-case forms in {@link #GREEK_LETTERS_LOWER}. This
1716     * lets you use one index for both lower and upper case, like with Latin and Cyrillic.
1717     */
1718    public static final String GREEK_LETTERS_UPPER =
1719            "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏΪΫΪΫ";
1720    /**
1721     * Includes both lower-case forms for Sigma, 'ς' and 'σ', but this matches the two upper-case Sigma in
1722     * {@link #GREEK_LETTERS_UPPER}. This lets you use one index for both lower and upper case, like with Latin and
1723     * Cyrillic.
1724     */
1725    public static final String GREEK_LETTERS_LOWER =
1726            "αβγδεζηθικλμνξοπρςστυφχψωάέήίόύώϊϋΐΰ";
1727
1728    public static final String GREEK_LETTERS = GREEK_LETTERS_UPPER + GREEK_LETTERS_LOWER;
1729
1730    public static final String CYRILLIC_LETTERS_UPPER =
1731            "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏѴҐ";
1732    public static final String CYRILLIC_LETTERS_LOWER =
1733            "абвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѵґ";
1734    public static final String CYRILLIC_LETTERS = CYRILLIC_LETTERS_UPPER + CYRILLIC_LETTERS_LOWER;
1735
1736    public static final String LETTERS_UPPER = LATIN_LETTERS_UPPER + GREEK_LETTERS_UPPER + CYRILLIC_LETTERS_UPPER;
1737    public static final String LETTERS_LOWER = LATIN_LETTERS_LOWER + GREEK_LETTERS_LOWER + CYRILLIC_LETTERS_LOWER;
1738    public static final String LETTERS = LETTERS_UPPER + LETTERS_LOWER;
1739    public static final String LETTERS_AND_NUMBERS = LETTERS + DIGITS;
1740}