001package squidpony; 002 003import regexodus.Matcher; 004import regexodus.Pattern; 005import regexodus.Replacer; 006import squidpony.squidmath.CrossHash; 007import squidpony.squidmath.NumberTools; 008 009import java.util.ArrayList; 010import java.util.Collection; 011import java.util.Iterator; 012import java.util.List; 013 014/** 015 * Various utility functions for dealing with Strings, CharSequences, and char[]s; this has lots of methods to convert 016 * to and from Strings and numbers, but also has tools to wrap long CharSequences to fit in a maximum width, join arrays 017 * of various items into long Strings, split/search/count occurrences of literal char arrays or CharSequences without 018 * using any regex, and generally tidy up generated text. This last step includes padding left and right (including a 019 * "strict" option that truncates Strings that are longer than the padded size), Capitalizing Each Word, Capitalizing 020 * the first word in a sentence, replacing "a improper usage of a" with "an improved replacement using an," etc. This 021 * also has a lot of predefined categories of chars that are considered widely enough supported in fonts, like 022 * {@link #COMMON_PUNCTUATION} and {@link #LATIN_LETTERS_UPPER}. 023 * <br> 024 * Created by Tommy Ettinger on 3/21/2016. 025 */ 026public class StringKit { 027 /** 028 * Searches text for the exact contents of the char array search; returns true if text contains search. 029 * @param text a CharSequence, such as a String or StringBuilder, that might contain search 030 * @param search a char array to try to find in text 031 * @return true if search was found 032 */ 033 public static boolean contains(CharSequence text, char[] search) { 034 return !(text == null || text.length() == 0 || search == null || search.length <= 0) 035 && containsPart(text, search, "", "") == search.length; 036 } 037 038 /** 039 * Tries to find as much of the char array {@code search} in the CharSequence {@code text}, always starting from the 040 * beginning of search (if the beginning isn't found, then it finds nothing), and returns the length of the found 041 * part of search (0 if not found). 042 * @param text a CharSequence to search in 043 * @param search a char array to look for 044 * @return the length of the searched-for char array that was found 045 */ 046 public static int containsPart(CharSequence text, char[] search) 047 { 048 return containsPart(text, search, "", ""); 049 } 050 051 /** 052 * Tries to find as much of the sequence {@code prefix search suffix} as it can in text, where prefix and suffix are 053 * CharSequences for some reason and search is a char array. Returns the length of the sequence it was able to 054 * match, up to {@code prefix.length() + search.length + suffix.length()}, or 0 if no part of the looked-for 055 * sequence could be found. 056 * <br> 057 * This is almost certainly too specific to be useful outside of a handful of cases. 058 * @param text a CharSequence to search in 059 * @param search a char array to look for, surrounded by prefix and suffix 060 * @param prefix a mandatory prefix before search, separated for some weird optimization reason 061 * @param suffix a mandatory suffix after search, separated for some weird optimization reason 062 * @return the length of the searched-for prefix+search+suffix that was found 063 */ 064 public static int containsPart(CharSequence text, char[] search, CharSequence prefix, CharSequence suffix) 065 { 066 if(prefix == null) prefix = ""; 067 if(suffix == null) suffix = ""; 068 int bl = prefix.length(), el = suffix.length(); 069 if(text == null || text.length() == 0 || search == null || (search.length + bl + el <= 0)) 070 return 0; 071 int sl = bl + search.length + el, tl = text.length() - sl, f = 0, sl2 = sl - el; 072 char s = (bl <= 0) ? (search.length <= 0 ? suffix.charAt(0) : search[0]) : prefix.charAt(0); 073 PRIMARY: 074 for (int i = 0; i <= tl; i++) { 075 if(text.charAt(i) == s) 076 { 077 for (int j = i+1, x = 1; x < sl; j++, x++) { 078 if(x < bl) 079 { 080 if (text.charAt(j) != prefix.charAt(x)) { 081 f = Math.max(f, x); 082 continue PRIMARY; 083 } 084 } 085 else if(x < sl2) 086 { 087 if (text.charAt(j) != search[x-bl]) { 088 f = Math.max(f, x); 089 continue PRIMARY; 090 } 091 } 092 else 093 { 094 if (text.charAt(j) != suffix.charAt(x - sl2)) { 095 f = Math.max(f, x); 096 continue PRIMARY; 097 } 098 } 099 } 100 return sl; 101 } 102 } 103 return f; 104 } 105 106 public static String join(CharSequence delimiter, CharSequence... elements) { 107 if (elements == null || elements.length == 0) return ""; 108 StringBuilder sb = new StringBuilder(64); 109 sb.append(elements[0]); 110 for (int i = 1; i < elements.length; i++) { 111 sb.append(delimiter).append(elements[i]); 112 } 113 return sb.toString(); 114 } 115 116 public static String join(CharSequence delimiter, Collection<? extends CharSequence> elements) { 117 if (elements == null || elements.isEmpty()) return ""; 118 StringBuilder sb = new StringBuilder(64); 119 Iterator<? extends CharSequence> it = elements.iterator(); 120 sb.append(it.next()); 121 while(it.hasNext()) { 122 sb.append(delimiter).append(it.next()); 123 } 124 return sb.toString(); 125 } 126 127 public static String joinArrays(CharSequence delimiter, char[]... elements) { 128 if (elements == null || elements.length == 0) return ""; 129 StringBuilder sb = new StringBuilder(64); 130 sb.append(elements[0]); 131 for (int i = 1; i < elements.length; i++) { 132 sb.append(delimiter).append(elements[i]); 133 } 134 return sb.toString(); 135 } 136 137 public static String join(CharSequence delimiter, long... elements) { 138 if (elements == null || elements.length == 0) return ""; 139 StringBuilder sb = new StringBuilder(64); 140 sb.append(elements[0]); 141 for (int i = 1; i < elements.length; i++) { 142 sb.append(delimiter).append(elements[i]); 143 } 144 return sb.toString(); 145 } 146 public static String join(CharSequence delimiter, double... elements) { 147 if (elements == null || elements.length == 0) return ""; 148 StringBuilder sb = new StringBuilder(64); 149 sb.append(elements[0]); 150 for (int i = 1; i < elements.length; i++) { 151 sb.append(delimiter).append(elements[i]); 152 } 153 return sb.toString(); 154 } 155 public static String join(CharSequence delimiter, int... elements) { 156 if (elements == null || elements.length == 0) return ""; 157 StringBuilder sb = new StringBuilder(64); 158 sb.append(elements[0]); 159 for (int i = 1; i < elements.length; i++) { 160 sb.append(delimiter).append(elements[i]); 161 } 162 return sb.toString(); 163 } 164 public static String join(CharSequence delimiter, float... elements) { 165 if (elements == null || elements.length == 0) return ""; 166 StringBuilder sb = new StringBuilder(64); 167 sb.append(elements[0]); 168 for (int i = 1; i < elements.length; i++) { 169 sb.append(delimiter).append(elements[i]); 170 } 171 return sb.toString(); 172 } 173 public static String join(CharSequence delimiter, short... elements) { 174 if (elements == null || elements.length == 0) return ""; 175 StringBuilder sb = new StringBuilder(64); 176 sb.append(elements[0]); 177 for (int i = 1; i < elements.length; i++) { 178 sb.append(delimiter).append(elements[i]); 179 } 180 return sb.toString(); 181 } 182 public static String join(CharSequence delimiter, char... elements) { 183 if (elements == null || elements.length == 0) return ""; 184 StringBuilder sb = new StringBuilder(64); 185 sb.append(elements[0]); 186 for (int i = 1; i < elements.length; i++) { 187 sb.append(delimiter).append(elements[i]); 188 } 189 return sb.toString(); 190 } 191 public static String join(CharSequence delimiter, byte... elements) { 192 if (elements == null || elements.length == 0) return ""; 193 StringBuilder sb = new StringBuilder(64); 194 sb.append(elements[0]); 195 for (int i = 1; i < elements.length; i++) { 196 sb.append(delimiter).append(elements[i]); 197 } 198 return sb.toString(); 199 } 200 public static String join(CharSequence delimiter, boolean... elements) { 201 if (elements == null || elements.length == 0) return ""; 202 StringBuilder sb = new StringBuilder(64); 203 sb.append(elements[0]); 204 for (int i = 1; i < elements.length; i++) { 205 sb.append(delimiter).append(elements[i]); 206 } 207 return sb.toString(); 208 } 209 210 /** 211 * Joins the items in {@code elements} by calling their toString method on them (or just using the String "null" for 212 * null items), and separating each item with {@code delimiter}. Unlike other join methods in this class, this does 213 * not take a vararg of Object items, since that would cause confusion with the overloads that take one object, such 214 * as {@link #join(CharSequence, Iterable)}; it takes a non-vararg Object array instead. 215 * @param delimiter the String or other CharSequence to separate items in elements with 216 * @param elements the Object items to stringify and join into one String; if the array is null or empty, this 217 * returns an empty String, and if items are null, they are shown as "null" 218 * @return the String representations of the items in elements, separated by delimiter and put in one String 219 */ 220 public static String join(CharSequence delimiter, Object[] elements) { 221 if (elements == null || elements.length == 0) return ""; 222 StringBuilder sb = new StringBuilder(64); 223 sb.append(elements[0]); 224 for (int i = 1; i < elements.length; i++) { 225 sb.append(delimiter).append(elements[i]); 226 } 227 return sb.toString(); 228 } 229 /** 230 * Joins the items in {@code elements} by calling their toString method on them (or just using the String "null" for 231 * null items), and separating each item with {@code delimiter}. This can take any Iterable of any type for its 232 * elements parameter. 233 * @param delimiter the String or other CharSequence to separate items in elements with 234 * @param elements the Object items to stringify and join into one String; if Iterable is null or empty, this 235 * returns an empty String, and if items are null, they are shown as "null" 236 * @return the String representations of the items in elements, separated by delimiter and put in one String 237 */ 238 public static String join(CharSequence delimiter, Iterable<?> elements) { 239 if (elements == null) return ""; 240 Iterator<?> it = elements.iterator(); 241 if(!it.hasNext()) return ""; 242 StringBuilder sb = new StringBuilder(64); 243 sb.append(it.next()); 244 while(it.hasNext()) { 245 sb.append(delimiter).append(it.next()); 246 } 247 return sb.toString(); 248 } 249 250 /** 251 * Joins the boolean array {@code elements} without delimiters into a String, using "1" for true and "0" for false. 252 * @param elements an array or vararg of booleans 253 * @return a String using 1 for true elements and 0 for false, or "N" if elements is null 254 */ 255 public static String joinAlt(boolean... elements) { 256 if (elements == null) return "N"; 257 if(elements.length == 0) return ""; 258 StringBuilder sb = new StringBuilder(64); 259 for (int i = 0; i < elements.length; i++) { 260 sb.append(elements[i] ? '1' : '0'); 261 } 262 return sb.toString(); 263 } 264 265 /** 266 * Like {@link #join(CharSequence, long...)}, but this appends an 'L' to each number so they can be read in by Java. 267 * @param delimiter 268 * @param elements 269 * @return 270 */ 271 public static String joinAlt(CharSequence delimiter, long... elements) { 272 if (elements == null || elements.length == 0) return ""; 273 StringBuilder sb = new StringBuilder(elements.length << 2); 274 sb.append(elements[0]).append('L'); 275 for (int i = 1; i < elements.length; i++) { 276 sb.append(delimiter).append(elements[i]).append('L'); 277 } 278 return sb.toString(); 279 } 280 /** 281 * Scans repeatedly in {@code source} for the String {@code search}, not scanning the same char twice except as part 282 * of a larger String, and returns the number of instances of search that were found, or 0 if source is null or if 283 * search is null or empty. 284 * @param source a String to look through 285 * @param search a String to look for 286 * @return the number of times search was found in source 287 */ 288 public static int count(final String source, final String search) 289 { 290 if(source == null || search == null || source.isEmpty() || search.isEmpty()) 291 return 0; 292 int amount = 0, idx = -1; 293 while ((idx = source.indexOf(search, idx+1)) >= 0) 294 ++amount; 295 return amount; 296 } 297 298 /** 299 * Scans repeatedly in {@code source} for the codepoint {@code search} (which is usually a char literal), not 300 * scanning the same section twice, and returns the number of instances of search that were found, or 0 if source is 301 * null. 302 * @param source a String to look through 303 * @param search a codepoint or char to look for 304 * @return the number of times search was found in source 305 */ 306 public static int count(final String source, final int search) 307 { 308 if(source == null || source.isEmpty()) 309 return 0; 310 int amount = 0, idx = -1; 311 while ((idx = source.indexOf(search, idx+1)) >= 0) 312 ++amount; 313 return amount; 314 } 315 /** 316 * Scans repeatedly in {@code source} (only using the area from startIndex, inclusive, to endIndex, exclusive) for 317 * the String {@code search}, not scanning the same char twice except as part of a larger String, and returns the 318 * number of instances of search that were found, or 0 if source or search is null or if the searched area is empty. 319 * If endIndex is negative, this will search from startIndex until the end of the source. 320 * @param source a String to look through 321 * @param search a String to look for 322 * @param startIndex the first index to search through, inclusive 323 * @param endIndex the last index to search through, exclusive; if negative this will search the rest of source 324 * @return the number of times search was found in source 325 */ 326 public static int count(final String source, final String search, final int startIndex, int endIndex) 327 { 328 if(endIndex < 0) endIndex = 0x7fffffff; 329 if(source == null || search == null || source.isEmpty() || search.isEmpty() 330 || startIndex < 0 || startIndex >= endIndex) 331 return 0; 332 int amount = 0, idx = startIndex-1; 333 while ((idx = source.indexOf(search, idx+1)) >= 0 && idx < endIndex) 334 ++amount; 335 return amount; 336 } 337 338 /** 339 * Scans repeatedly in {@code source} (only using the area from startIndex, inclusive, to endIndex, exclusive) for 340 * the codepoint {@code search} (which is usually a char literal), not scanning the same section twice, and returns 341 * the number of instances of search that were found, or 0 if source is null or if the searched area is empty. 342 * If endIndex is negative, this will search from startIndex until the end of the source. 343 * @param source a String to look through 344 * @param search a codepoint or char to look for 345 * @param startIndex the first index to search through, inclusive 346 * @param endIndex the last index to search through, exclusive; if negative this will search the rest of source 347 * @return the number of times search was found in source 348 */ 349 public static int count(final String source, final int search, final int startIndex, int endIndex) 350 { 351 if(endIndex < 0) endIndex = 0x7fffffff; 352 if(source == null || source.isEmpty() || startIndex < 0 || startIndex >= endIndex) 353 return 0; 354 int amount = 0, idx = startIndex-1; 355 while ((idx = source.indexOf(search, idx+1)) >= 0 && idx < endIndex) 356 ++amount; 357 return amount; 358 } 359 360 /** 361 * Like {@link String#substring(int, int)} but returns "" instead of throwing any sort of Exception. 362 * @param source the String to get a substring from 363 * @param beginIndex the first index, inclusive; will be treated as 0 if negative 364 * @param endIndex the index after the last character (exclusive); if negative this will be source.length() 365 * @return the substring of source between beginIndex and endIndex, or "" if any parameters are null/invalid 366 */ 367 public static String safeSubstring(String source, int beginIndex, int endIndex) 368 { 369 if(source == null || source.isEmpty()) return ""; 370 if(beginIndex < 0) beginIndex = 0; 371 if(endIndex < 0 || endIndex > source.length()) endIndex = source.length(); 372 if(beginIndex > endIndex) return ""; 373 return source.substring(beginIndex, endIndex); 374 } 375 376 /** 377 * Like {@link String#split(String)} but doesn't use any regex for splitting (delimiter is a literal String). 378 * @param source the String to get split-up substrings from 379 * @param delimiter the literal String to split on (not a regex); will not be included in the returned String array 380 * @return a String array consisting of at least one String (all of Source if nothing was split) 381 */ 382 public static String[] split(String source, String delimiter) { 383 int amount = count(source, delimiter); 384 if (amount <= 0) return new String[]{source}; 385 String[] splat = new String[amount+1]; 386 int dl = delimiter.length(), idx = -dl, idx2; 387 for (int i = 0; i < amount; i++) { 388 splat[i] = safeSubstring(source, idx+dl, idx = source.indexOf(delimiter, idx+dl)); 389 } 390 if((idx2 = source.indexOf(delimiter, idx+dl)) < 0) 391 { 392 splat[amount] = safeSubstring(source, idx+dl, source.length()); 393 } 394 else 395 { 396 splat[amount] = safeSubstring(source, idx+dl, idx2); 397 } 398 return splat; 399 } 400 401 public static final String mask64 = "0000000000000000000000000000000000000000000000000000000000000000", 402 mask32 = "00000000000000000000000000000000", 403 mask16 = "0000000000000000", 404 mask8 = "00000000"; 405 406 private static final char[] keyBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=".toCharArray(), 407 valBase64 = new char[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 408 62, 0, 0, 0, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 64, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 409 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0}; 410 411 private static final StringBuilder hexBuilder = new StringBuilder(16).append("0000000000000000"); 412 public static String hex(long number) { 413 for (int i = 0; i < 16; i++) { 414 hexBuilder.setCharAt(15 - i, hexDigits[(int)(number >> (i << 2) & 15)]); 415 } 416 return hexBuilder.toString(); 417 } 418 419 public static String hex(double number) { 420 // avoids creating temporary long values, which can be slow on GWT 421 int h = NumberTools.doubleToLowIntBits(number); 422 for (int i = 0; i < 8; i++) { 423 hexBuilder.setCharAt(15 - i, hexDigits[(h >> (i << 2) & 15)]); 424 } 425 h = NumberTools.doubleToHighIntBits(number); 426 for (int i = 0; i < 8; i++) { 427 hexBuilder.setCharAt(7 - i, hexDigits[(h >> (i << 2) & 15)]); 428 } 429 return hexBuilder.toString(); 430 } 431 432 public static String hex(int number) { 433 for (int i = 0; i < 8; i++) { 434 hexBuilder.setCharAt(7 - i, hexDigits[(number >> (i << 2) & 15)]); 435 } 436 return hexBuilder.substring(0, 8); 437 } 438 439 public static String hex(float number) { 440 final int h = NumberTools.floatToIntBits(number); 441 for (int i = 0; i < 8; i++) { 442 hexBuilder.setCharAt(7 - i, hexDigits[(h >> (i << 2) & 15)]); 443 } 444 return hexBuilder.substring(0, 8); 445 } 446 447 public static String hex(short number) { 448 for (int i = 0; i < 4; i++) { 449 hexBuilder.setCharAt(3 - i, hexDigits[(number >> (i << 2) & 15)]); 450 } 451 return hexBuilder.substring(0, 4); 452 } 453 454 public static String hex(char number) { 455 for (int i = 0; i < 4; i++) { 456 hexBuilder.setCharAt(3 - i, hexDigits[(number >> (i << 2) & 15)]); 457 } 458 return hexBuilder.substring(0, 4); 459 } 460 461 public static String hex(byte number) { 462 hexBuilder.setCharAt(0, hexDigits[(number >> 4 & 15)]); 463 hexBuilder.setCharAt(1, hexDigits[(number & 15)]); 464 return hexBuilder.substring(0, 2); 465 } 466 467 public static StringBuilder appendHex(StringBuilder builder, long number){ 468 for (int i = 60; i >= 0; i -= 4) { 469 builder.append(hexDigits[(int)(number >> i & 15)]); 470 } 471 return builder; 472 } 473 public static StringBuilder appendHex(StringBuilder builder, double number){ 474 // avoids creating temporary long values, which can be slow on GWT 475 int h = NumberTools.doubleToHighIntBits(number); 476 for (int i = 28; i >= 0; i -= 4) { 477 builder.append(hexDigits[(h >> i & 15)]); 478 } 479 h = NumberTools.doubleToLowIntBits(number); 480 for (int i = 28; i >= 0; i -= 4) { 481 builder.append(hexDigits[(h >> i & 15)]); 482 } 483 return builder; 484 } 485 public static StringBuilder appendHex(StringBuilder builder, int number){ 486 for (int i = 28; i >= 0; i -= 4) { 487 builder.append(hexDigits[(number >> i & 15)]); 488 } 489 return builder; 490 } 491 public static StringBuilder appendHex(StringBuilder builder, float number){ 492 final int h = NumberTools.floatToIntBits(number); 493 for (int i = 28; i >= 0; i -= 4) { 494 builder.append(hexDigits[(h >> i & 15)]); 495 } 496 return builder; 497 } 498 public static StringBuilder appendHex(StringBuilder builder, short number){ 499 for (int i = 12; i >= 0; i -= 4) { 500 builder.append(hexDigits[(number >> i & 15)]); 501 } 502 return builder; 503 } 504 public static StringBuilder appendHex(StringBuilder builder, char number){ 505 for (int i = 12; i >= 0; i -= 4) { 506 builder.append(hexDigits[(number >> i & 15)]); 507 } 508 return builder; 509 } 510 public static StringBuilder appendHex(StringBuilder builder, byte number){ 511 builder.append(hexDigits[(number >> 4 & 15)]); 512 builder.append(hexDigits[(number & 15)]); 513 return builder; 514 } 515 516 public static String hex(long[] numbers) { 517 int len; 518 if (numbers == null || (len = numbers.length) <= 0) return ""; 519 StringBuilder sb = new StringBuilder(numbers.length << 4); 520 for (int i = 0; i < len; i++) { 521 appendHex(sb, numbers[i]); 522 } 523 return sb.toString(); 524 } 525 526 public static String hex(double[] numbers) { 527 int len; 528 if (numbers == null || (len = numbers.length) <= 0) return ""; 529 StringBuilder sb = new StringBuilder(numbers.length << 4); 530 for (int i = 0; i < len; i++) { 531 appendHex(sb, numbers[i]); 532 } 533 return sb.toString(); 534 } 535 536 public static String hex(int[] numbers) { 537 int len; 538 if (numbers == null || (len = numbers.length) <= 0) return ""; 539 StringBuilder sb = new StringBuilder(numbers.length << 3); 540 for (int i = 0; i < len; i++) { 541 appendHex(sb, numbers[i]); 542 } 543 return sb.toString(); 544 } 545 546 547 public static String hex(float[] numbers) { 548 int len; 549 if (numbers == null || (len = numbers.length) <= 0) return ""; 550 StringBuilder sb = new StringBuilder(numbers.length << 3); 551 for (int i = 0; i < len; i++) { 552 appendHex(sb, numbers[i]); 553 } 554 return sb.toString(); 555 } 556 557 public static String hex(short[] numbers) { 558 int len; 559 if (numbers == null || (len = numbers.length) <= 0) return ""; 560 StringBuilder sb = new StringBuilder(numbers.length << 2); 561 for (int i = 0; i < len; i++) { 562 appendHex(sb, numbers[i]); 563 } 564 return sb.toString(); 565 } 566 567 public static String hex(char[] numbers) { 568 int len; 569 if (numbers == null || (len = numbers.length) <= 0) return ""; 570 StringBuilder sb = new StringBuilder(numbers.length << 2); 571 for (int i = 0; i < len; i++) { 572 appendHex(sb, numbers[i]); 573 } 574 return sb.toString(); 575 } 576 577 public static String hex(byte[] numbers) { 578 int len; 579 if (numbers == null || (len = numbers.length) <= 0) return ""; 580 StringBuilder sb = new StringBuilder(numbers.length << 1); 581 for (int i = 0; i < len; i++) { 582 appendHex(sb, numbers[i]); 583 } 584 return sb.toString(); 585 } 586 587 public static String bin(long number) { 588 String h = Long.toBinaryString(number); 589 return mask64.substring(0, 64 - h.length()) + h; 590 } 591 592 public static String bin(int number) { 593 String h = Integer.toBinaryString(number); 594 return mask32.substring(0, 32 - h.length()) + h; 595 } 596 597 public static String bin(short number) { 598 String h = Integer.toBinaryString(number & 0xffff); 599 return mask16.substring(0, 16 - h.length()) + h; 600 } 601 602 public static String bin(char number) { 603 String h = Integer.toBinaryString(number & 0xffff); 604 return mask16.substring(0, 16 - h.length()) + h; 605 } 606 607 public static String bin(byte number) { 608 String h = Integer.toBinaryString(number & 0xff); 609 return mask8.substring(0, 8 - h.length()) + h; 610 } 611 private static final int[] hexCodes = new int[] 612 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 613 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 614 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 615 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, 616 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, 617 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 618 -1,10,11,12,13,14,15}; 619 620 /** 621 * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start 622 * and returns the long they represent, reading at most 16 characters (17 if there is a sign) and returning the 623 * result if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also 624 * represent negative numbers as they are printed by such methods as String.format given a %x in the formatting 625 * string, or this class' {@link #hex(long)} method; that is, if the first char of a 16-char (or longer) 626 * CharSequence is a hex digit 8 or higher, then the whole number represents a negative number, using two's 627 * complement and so on. This means "FFFFFFFFFFFFFFFF" would return the long -1 when passed to this, though you 628 * could also simply use "-1 ". 629 * <br> 630 * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is an odd omission from earlier JDKs. 631 * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or 632 * stopping the parse process early if a non-hex-digit char is read before the end of cs is reached. If the parse is 633 * stopped early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger 634 * places. 635 * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start) 636 * @return the long that cs represents 637 */ 638 public static long longFromHex(final CharSequence cs) { 639 return longFromHex(cs, 0, cs.length()); 640 } 641 /** 642 * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start 643 * and returns the long they represent, reading at most 16 characters (17 if there is a sign) and returning the 644 * result if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also 645 * represent negative numbers as they are printed by such methods as String.format given a %x in the formatting 646 * string, or this class' {@link #hex(long)} method; that is, if the first char of a 16-char (or longer) 647 * CharSequence is a hex digit 8 or higher, then the whole number represents a negative number, using two's 648 * complement and so on. This means "FFFFFFFFFFFFFFFF" would return the long -1 when passed to this, though you 649 * could also simply use "-1 ". If you use both '-' at the start and have the most significant digit as 8 or higher, 650 * such as with "-FFFFFFFFFFFFFFFF", then both indicate a negative number, but the digits will be processed first 651 * (producing -1) and then the whole thing will be multiplied by -1 to flip the sign again (returning 1). 652 * <br> 653 * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is an odd omission from earlier JDKs. 654 * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or 655 * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped 656 * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places. 657 * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start) 658 * @param start the (inclusive) first character position in cs to read 659 * @param end the (exclusive) last character position in cs to read (this stops after 16 characters if end is too large) 660 * @return the long that cs represents 661 */ 662 public static long longFromHex(final CharSequence cs, final int start, int end) { 663 int len, h, lim = 16; 664 if (cs == null || start < 0 || end <= 0 || end - start <= 0 665 || (len = cs.length()) - start <= 0 || end > len) 666 return 0; 667 char c = cs.charAt(start); 668 if (c == '-') { 669 len = -1; 670 h = 0; 671 lim = 17; 672 } else if (c == '+') { 673 len = 1; 674 h = 0; 675 lim = 17; 676 } else if (c > 102 || (h = hexCodes[c]) < 0) 677 return 0; 678 else { 679 len = 1; 680 } 681 long data = h; 682 for (int i = start + 1; i < end && i < start + lim; i++) { 683 if ((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0) 684 return data * len; 685 data <<= 4; 686 data |= h; 687 } 688 return data * len; 689 } 690 /** 691 * Reads in a char[] containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start and 692 * returns the long they represent, reading at most 16 characters (17 if there is a sign) and returning the result 693 * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent 694 * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or 695 * this class' {@link #hex(long)} method; that is, if the first digit of a 16-char (or longer) char[] is a hex 696 * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This 697 * means "FFFFFFFFFFFFFFFF" would return the long -1L when passed to this, though you could also simply use "-1 ". 698 * If you use both '-' at the start and have the most significant digit as 8 or higher, such as with 699 * "-FFFFFFFFFFFFFFFF", then both indicate a negative number, but the digits will be processed first (producing -1) 700 * and then the whole thing will be multiplied by -1 to flip the sign again (returning 1). 701 * <br> 702 * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is an odd omission from earlier JDKs. 703 * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or 704 * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped 705 * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places. 706 * @param cs a char array containing only hex digits with an optional sign (no 0x at the start) 707 * @param start the (inclusive) first character position in cs to read 708 * @param end the (exclusive) last character position in cs to read (this stops after 8 or 9 characters if end is too large, depending on sign) 709 * @return the long that cs represents 710 */ 711 public static long longFromHex(final char[] cs, final int start, int end) 712 { 713 int len, h, lim = 16; 714 if(cs == null || start < 0 || end <=0 || end - start <= 0 715 || (len = cs.length) - start <= 0 || end > len) 716 return 0; 717 char c = cs[start]; 718 if(c == '-') 719 { 720 len = -1; 721 h = 0; 722 lim = 17; 723 } 724 else if(c == '+') 725 { 726 len = 1; 727 h = 0; 728 lim = 17; 729 } 730 else if(c > 102 || (h = hexCodes[c]) < 0) 731 return 0; 732 else 733 { 734 len = 1; 735 } 736 int data = h; 737 for (int i = start + 1; i < end && i < start + lim; i++) { 738 if((c = cs[i]) > 102 || (h = hexCodes[c]) < 0) 739 return data * len; 740 data <<= 4; 741 data |= h; 742 } 743 return data * len; 744 } 745 746 /** 747 * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start 748 * and returns the int they represent, reading at most 8 characters (9 if there is a sign) and returning the result 749 * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent 750 * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or 751 * this class' {@link #hex(int)} method; that is, if the first digit of an 8-char (or longer) CharSequence is a hex 752 * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This 753 * means "FFFFFFFF" would return the int -1 when passed to this, though you could also simply use "-1 ". If you use 754 * both '-' at the start and have the most significant digit as 8 or higher, such as with "-FFFFFFFF", then both 755 * indicate a negative number, but the digits will be processed first (producing -1) and then the whole thing will 756 * be multiplied by -1 to flip the sign again (returning 1). 757 * <br> 758 * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is an odd omission from earlier JDKs. 759 * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or 760 * stopping the parse process early if a non-hex-digit char is read before the end of cs is reached. If the parse is 761 * stopped early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger 762 * places. 763 * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start) 764 * @return the int that cs represents 765 */ 766 public static int intFromHex(final CharSequence cs) { 767 return intFromHex(cs, 0, cs.length()); 768 } 769 /** 770 * Reads in a CharSequence containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start 771 * and returns the int they represent, reading at most 8 characters (9 if there is a sign) and returning the result 772 * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent 773 * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or 774 * this class' {@link #hex(int)} method; that is, if the first digit of an 8-char (or longer) CharSequence is a hex 775 * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This 776 * means "FFFFFFFF" would return the int -1 when passed to this, though you could also simply use "-1 ". If you use 777 * both '-' at the start and have the most significant digit as 8 or higher, such as with "-FFFFFFFF", then both 778 * indicate a negative number, but the digits will be processed first (producing -1) and then the whole thing will 779 * be multiplied by -1 to flip the sign again (returning 1). 780 * <br> 781 * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is an odd omission from earlier JDKs. 782 * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or 783 * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped 784 * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places. 785 * @param cs a CharSequence, such as a String, containing only hex digits with an optional sign (no 0x at the start) 786 * @param start the (inclusive) first character position in cs to read 787 * @param end the (exclusive) last character position in cs to read (this stops after 8 or 9 characters if end is too large, depending on sign) 788 * @return the int that cs represents 789 */ 790 public static int intFromHex(final CharSequence cs, final int start, int end) 791 { 792 int len, h, lim = 8; 793 if(cs == null || start < 0 || end <=0 || end - start <= 0 794 || (len = cs.length()) - start <= 0 || end > len) 795 return 0; 796 char c = cs.charAt(start); 797 if(c == '-') 798 { 799 len = -1; 800 h = 0; 801 lim = 9; 802 } 803 else if(c == '+') 804 { 805 len = 1; 806 h = 0; 807 lim = 9; 808 } 809 else if(c > 102 || (h = hexCodes[c]) < 0) 810 return 0; 811 else 812 { 813 len = 1; 814 } 815 int data = h; 816 for (int i = start + 1; i < end && i < start + lim; i++) { 817 if((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0) 818 return data * len; 819 data <<= 4; 820 data |= h; 821 } 822 return data * len; 823 } 824 /** 825 * Reads in a char[] containing only hex digits (only 0-9, a-f, and A-F) with an optional sign at the start 826 * and returns the int they represent, reading at most 8 characters (9 if there is a sign) and returning the result 827 * if valid, or 0 if nothing could be read. The leading sign can be '+' or '-' if present. This can also represent 828 * negative numbers as they are printed by such methods as String.format given a %x in the formatting string, or 829 * this class' {@link #hex(int)} method; that is, if the first digit of an 8-char (or longer) char[] is a hex 830 * digit 8 or higher, then the whole number represents a negative number, using two's complement and so on. This 831 * means "FFFFFFFF" would return the int -1 when passed to this, though you could also simply use "-1 ". If you use 832 * both '-' at the start and have the most significant digit as 8 or higher, such as with "-FFFFFFFF", then both 833 * indicate a negative number, but the digits will be processed first (producing -1) and then the whole thing will 834 * be multiplied by -1 to flip the sign again (returning 1). 835 * <br> 836 * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is an odd omission from earlier JDKs. 837 * This doesn't throw on invalid input, though, instead returning 0 if the first char is not a hex digit, or 838 * stopping the parse process early if a non-hex-digit char is read before end is reached. If the parse is stopped 839 * early, this behaves as you would expect for a number with less digits, and simply doesn't fill the larger places. 840 * @param cs a char array containing only hex digits with an optional sign (no 0x at the start) 841 * @param start the (inclusive) first character position in cs to read 842 * @param end the (exclusive) last character position in cs to read (this stops after 8 or 9 characters if end is too large, depending on sign) 843 * @return the int that cs represents 844 */ 845 public static int intFromHex(final char[] cs, final int start, int end) 846 { 847 int len, h, lim = 8; 848 if(cs == null || start < 0 || end <=0 || end - start <= 0 849 || (len = cs.length) - start <= 0 || end > len) 850 return 0; 851 char c = cs[start]; 852 if(c == '-') 853 { 854 len = -1; 855 h = 0; 856 lim = 9; 857 } 858 else if(c == '+') 859 { 860 len = 1; 861 h = 0; 862 lim = 9; 863 } 864 else if(c > 102 || (h = hexCodes[c]) < 0) 865 return 0; 866 else 867 { 868 len = 1; 869 } 870 int data = h; 871 for (int i = start + 1; i < end && i < start + lim; i++) { 872 if((c = cs[i]) > 102 || (h = hexCodes[c]) < 0) 873 return data * len; 874 data <<= 4; 875 data |= h; 876 } 877 return data * len; 878 } 879 /** 880 * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the 881 * long they represent, reading at most 19 characters (20 if there is a sign) and returning the result if valid, or 882 * 0 if nothing could be read. The leading sign can be '+' or '-' if present. Unlike 883 * {@link #intFromDec(CharSequence)}, this can't effectively be used to read unsigned longs as decimal literals, 884 * since anything larger than the highest signed long would be larger than the normal limit for longs as text (it 885 * would be 20 characters without a sign, where we limit it to 19 without a sign to match normal behavior). 886 * <br> 887 * Should be fairly close to the JDK's Long.parseLong method, but this also supports CharSequence data instead of 888 * just String data, and ignores chars after the number. This doesn't throw on invalid input, either, instead 889 * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit 890 * char is read before the end of cs is reached. If the parse is stopped early, this behaves as you would expect for 891 * a number with less digits, and simply doesn't fill the larger places. 892 * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign 893 * @return the long that cs represents 894 */ 895 public static long longFromDec(final CharSequence cs) { 896 return longFromDec(cs,0, cs.length()); 897 } 898 /** 899 * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the 900 * long they represent between the given positions {@code start} and {@code end}, reading at most 19 characters (20 901 * if there is a sign) or until end is reached and returning the result if valid, or 0 if nothing could be read. The 902 * leading sign can be '+' or '-' if present. Unlike {@link #intFromDec(CharSequence, int, int)}, this can't 903 * effectively be used to read unsigned longs as decimal literals, since anything larger than the highest signed 904 * long would be larger than the normal limit for longs as text (it would be 20 characters without a sign, where we 905 * limit it to 19 without a sign to match normal behavior). 906 * <br> 907 * Should be fairly close to the JDK's Long.parseLong method, but this also supports CharSequence data instead of 908 * just String data, and allows specifying a start and end. This doesn't throw on invalid input, either, instead 909 * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit 910 * char is read before end is reached. If the parse is stopped early, this behaves as you would expect for a number 911 * with less digits, and simply doesn't fill the larger places. 912 * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign 913 * @param start the (inclusive) first character position in cs to read 914 * @param end the (exclusive) last character position in cs to read (this stops after 19 or 20 characters if end is too large, depending on sign) 915 * @return the long that cs represents 916 */ 917 public static long longFromDec(final CharSequence cs, final int start, int end) 918 { 919 int len, h, lim = 19; 920 long sign = 1L; 921 if(cs == null || start < 0 || end <=0 || end - start <= 0 922 || (len = cs.length()) - start <= 0 || end > len) 923 return 0L; 924 char c = cs.charAt(start); 925 if(c == '-') 926 { 927 sign = -1L; 928 lim = 20; 929 h = 0; 930 } 931 else if(c == '+') 932 { 933 lim = 20; 934 h = 0; 935 } 936 else if(c > 102 || (h = hexCodes[c]) < 0 || h > 9) 937 return 0L; 938 long data = h; 939 for (int i = start + 1; i < end && i < start + lim; i++) { 940 if((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0 || h > 9) 941 return data * sign; 942 data = data * 10 + h; 943 } 944 return data * sign; 945 } 946 /** 947 * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the 948 * int they represent, reading at most 10 characters (11 if there is a sign) and returning the result if valid, or 0 949 * if nothing could be read. The leading sign can be '+' or '-' if present. This can technically be used to handle 950 * unsigned integers in decimal format, but it isn't the intended purpose. If you do use it for handling unsigned 951 * ints, 2147483647 is normally the highest positive int and -2147483648 the lowest negative one, but if you give 952 * this a number between 2147483647 and {@code 2147483647 + 2147483648}, it will interpret it as a negative number 953 * that fits in bounds using the normal rules for converting between signed and unsigned numbers. 954 * <br> 955 * Should be fairly close to the JDK's Integer.parseInt method, but this also supports CharSequence data instead of 956 * just String data, and ignores chars after the number. This doesn't throw on invalid input, either, instead 957 * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit 958 * char is read before the end of cs is reached. If the parse is stopped early, this behaves as you would expect for 959 * a number with less digits, and simply doesn't fill the larger places. 960 * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign 961 * @return the int that cs represents 962 */ 963 public static int intFromDec(final CharSequence cs) { 964 return intFromDec(cs, 0, cs.length()); 965 } 966 /** 967 * Reads in a CharSequence containing only decimal digits (0-9) with an optional sign at the start and returns the 968 * int they represent, reading at most 10 characters (11 if there is a sign) and returning the result if valid, or 0 969 * if nothing could be read. The leading sign can be '+' or '-' if present. This can technically be used to handle 970 * unsigned integers in decimal format, but it isn't the intended purpose. If you do use it for handling unsigned 971 * ints, 2147483647 is normally the highest positive int and -2147483648 the lowest negative one, but if you give 972 * this a number between 2147483647 and {@code 2147483647 + 2147483648}, it will interpret it as a negative number 973 * that fits in bounds using the normal rules for converting between signed and unsigned numbers. 974 * <br> 975 * Should be fairly close to the JDK's Integer.parseInt method, but this also supports CharSequence data instead of 976 * just String data, and allows specifying a start and end. This doesn't throw on invalid input, either, instead 977 * returning 0 if the first char is not a decimal digit, or stopping the parse process early if a non-decimal-digit 978 * char is read before end is reached. If the parse is stopped early, this behaves as you would expect for a number 979 * with less digits, and simply doesn't fill the larger places. 980 * @param cs a CharSequence, such as a String, containing only digits 0-9 with an optional sign 981 * @param start the (inclusive) first character position in cs to read 982 * @param end the (exclusive) last character position in cs to read (this stops after 10 or 11 characters if end is too large, depending on sign) 983 * @return the int that cs represents 984 */ 985 public static int intFromDec(final CharSequence cs, final int start, int end) 986 { 987 int len, h, lim = 10; 988 if(cs == null || start < 0 || end <=0 || end - start <= 0 989 || (len = cs.length()) - start <= 0 || end > len) 990 return 0; 991 char c = cs.charAt(start); 992 if(c == '-') 993 { 994 len = -1; 995 lim = 11; 996 h = 0; 997 } 998 else if(c == '+') 999 { 1000 len = 1; 1001 lim = 11; 1002 h = 0; 1003 } 1004 else if(c > 102 || (h = hexCodes[c]) < 0 || h > 9) 1005 return 0; 1006 else 1007 { 1008 len = 1; 1009 } 1010 int data = h; 1011 for (int i = start + 1; i < end && i < start + lim; i++) { 1012 if((c = cs.charAt(i)) > 102 || (h = hexCodes[c]) < 0 || h > 9) 1013 return data * len; 1014 data = data * 10 + h; 1015 } 1016 return data * len; 1017 } 1018 /** 1019 * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the long they represent, 1020 * reading at most 64 characters and returning the result if valid or 0 otherwise. The first digit is considered 1021 * the sign bit iff cs is 64 chars long. 1022 * <br> 1023 * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is a bizarre omission from earlier JDKs. 1024 * This doesn't throw on invalid input, though, instead returning 0. 1025 * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start) 1026 * @return the long that cs represents 1027 */ 1028 public static long longFromBin(CharSequence cs) 1029 { 1030 return longFromBin(cs, 0, cs.length()); 1031 } 1032 1033 /** 1034 * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the long they represent, 1035 * reading at most 64 characters and returning the result if valid or 0 otherwise. The first digit is considered 1036 * the sign bit iff cs is 64 chars long. 1037 * <br> 1038 * Should be fairly close to Java 8's Long.parseUnsignedLong method, which is a bizarre omission from earlier JDKs. 1039 * This doesn't throw on invalid input, though, instead returning 0. 1040 * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start) 1041 * @param start the first character position in cs to read from 1042 * @param end the last character position in cs to read from (this stops after 64 characters if end is too large) 1043 * @return the long that cs represents 1044 */ 1045 public static long longFromBin(CharSequence cs, final int start, final int end) 1046 { 1047 int len; 1048 if(cs == null || start < 0 || end <=0 || end - start <= 0 1049 || (len = cs.length()) - start <= 0 || end > len) 1050 return 0; 1051 char c = cs.charAt(start); 1052 if(c < '0' || c > '1') 1053 return 0; 1054 long data = hexCodes[c]; 1055 for (int i = start+1; i < end && i < start+64; i++) { 1056 if((c = cs.charAt(i)) < '0' || c > '1') 1057 return 0; 1058 data <<= 1; 1059 data |= c - '0'; 1060 } 1061 return data; 1062 } 1063 /** 1064 * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the int they represent, 1065 * reading at most 32 characters and returning the result if valid or 0 otherwise. The first digit is considered 1066 * the sign bit iff cs is 32 chars long. 1067 * <br> 1068 * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is a bizarre omission from earlier 1069 * JDKs. This doesn't throw on invalid input, though, instead returning 0. 1070 * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start) 1071 * @return the int that cs represents 1072 */ 1073 public static int intFromBin(CharSequence cs) 1074 { 1075 return intFromBin(cs, 0, cs.length()); 1076 } 1077 1078 /** 1079 * Reads in a CharSequence containing only binary digits (only 0 and 1) and returns the int they represent, 1080 * reading at most 32 characters and returning the result if valid or 0 otherwise. The first digit is considered 1081 * the sign bit iff cs is 32 chars long. 1082 * <br> 1083 * Should be fairly close to Java 8's Integer.parseUnsignedInt method, which is a bizarre omission from earlier 1084 * JDKs. This doesn't throw on invalid input, though, instead returning 0. 1085 * @param cs a CharSequence, such as a String, containing only binary digits (nothing at the start) 1086 * @param start the first character position in cs to read from 1087 * @param end the last character position in cs to read from (this stops after 32 characters if end is too large) 1088 * @return the int that cs represents 1089 */ 1090 public static int intFromBin(CharSequence cs, final int start, final int end) 1091 { 1092 int len; 1093 if(cs == null || start < 0 || end <=0 || end - start <= 0 1094 || (len = cs.length()) - start <= 0 || end > len) 1095 return 0; 1096 char c = cs.charAt(start); 1097 if(c < '0' || c > '1') 1098 return 0; 1099 int data = hexCodes[c]; 1100 for (int i = start+1; i < end && i < start+32; i++) { 1101 if((c = cs.charAt(i)) < '0' || c > '1') 1102 return 0; 1103 data <<= 1; 1104 data |= c - '0'; 1105 } 1106 return data; 1107 } 1108 1109 /** 1110 * Base-64 encodes number and stores that string representation in buf starting at offset; uses 11 chars. 1111 * 1112 * @param number the long to encode 1113 * @param offset the first position to set in buf 1114 * @param buf a char array that should be non-null and have length of at least offset + 11 1115 * @return buf, after modifying it in-place 1116 */ 1117 public static char[] b64Encode(long number, int offset, char[] buf) { 1118 if (buf != null && buf.length >= 11 - offset) { 1119 buf[offset] = keyBase64[(int) (number >>> 60)]; 1120 buf[offset + 1] = keyBase64[(int) (0x3f & number >>> 54)]; 1121 buf[offset + 2] = keyBase64[(int) (0x3f & number >>> 48)]; 1122 buf[offset + 3] = keyBase64[(int) (0x3f & number >>> 42)]; 1123 buf[offset + 4] = keyBase64[(int) (0x3f & number >>> 36)]; 1124 buf[offset + 5] = keyBase64[(int) (0x3f & number >>> 30)]; 1125 buf[offset + 6] = keyBase64[(int) (0x3f & number >>> 24)]; 1126 buf[offset + 7] = keyBase64[(int) (0x3f & number >>> 18)]; 1127 buf[offset + 8] = keyBase64[(int) (0x3f & number >>> 12)]; 1128 buf[offset + 9] = keyBase64[(int) (0x3f & number >>> 6)]; 1129 buf[offset + 10] = keyBase64[(int) (0x3f & number)]; 1130 } 1131 return buf; 1132 } 1133 1134 1135 /** 1136 * Base-64 encodes number and stores that string representation in buf starting at offset; uses 11 chars. 1137 * 1138 * @param number the double to encode 1139 * @param offset the first position to set in buf 1140 * @param buf a char array that should be non-null and have length of at least offset + 11 1141 * @return buf, after modifying it in-place 1142 */ 1143 public static char[] b64Encode(double number, int offset, char[] buf) { 1144 return b64Encode(NumberTools.doubleToLongBits(number), offset, buf); 1145 } 1146 1147 /** 1148 * Base-64 encodes number and stores that string representation in buf starting at offset; uses 6 chars. 1149 * 1150 * @param number the int to encode 1151 * @param offset the first position to set in buf 1152 * @param buf a char array that should be non-null and have length of at least offset + 6 1153 * @return buf, after modifying it in-place 1154 */ 1155 public static char[] b64Encode(int number, int offset, char[] buf) { 1156 if (buf != null && buf.length >= 6 - offset) { 1157 buf[offset] = keyBase64[number >>> 30]; 1158 buf[offset + 1] = keyBase64[0x3f & number >>> 24]; 1159 buf[offset + 2] = keyBase64[0x3f & number >>> 18]; 1160 buf[offset + 3] = keyBase64[0x3f & number >>> 12]; 1161 buf[offset + 4] = keyBase64[0x3f & number >>> 6]; 1162 buf[offset + 5] = keyBase64[0x3f & number]; 1163 } 1164 return buf; 1165 } 1166 1167 /** 1168 * Base-64 encodes number and stores that string representation in buf starting at offset; uses 6 chars. 1169 * 1170 * @param number the float to encode 1171 * @param offset the first position to set in buf 1172 * @param buf a char array that should be non-null and have length of at least offset + 6 1173 * @return buf, after modifying it in-place 1174 */ 1175 public static char[] b64Encode(float number, int offset, char[] buf) { 1176 return b64Encode(NumberTools.floatToIntBits(number), offset, buf); 1177 } 1178 1179 /** 1180 * Base-64 encodes number and stores that string representation in buf starting at offset; uses 3 chars. 1181 * 1182 * @param number the int to encode 1183 * @param offset the first position to set in buf 1184 * @param buf a char array that should be non-null and have length of at least offset + 3 1185 * @return buf, after modifying it in-place 1186 */ 1187 public static char[] b64Encode(short number, int offset, char[] buf) { 1188 if (buf != null && buf.length >= 3 - offset) { 1189 buf[offset] = keyBase64[number >>> 12]; 1190 buf[offset + 1] = keyBase64[0x3f & number >>> 6]; 1191 buf[offset + 2] = keyBase64[0x3f & number]; 1192 } 1193 return buf; 1194 } 1195 1196 /** 1197 * Base-64 encodes glyph and stores that string representation in buf starting at offset; uses 3 chars. 1198 * 1199 * @param glyph the char to encode 1200 * @param offset the first position to set in buf 1201 * @param buf a char array that should be non-null and have length of at least offset + 3 1202 * @return buf, after modifying it in-place 1203 */ 1204 public static char[] b64Encode(char glyph, int offset, char[] buf) { 1205 if (buf != null && buf.length >= 4 - offset) { 1206 buf[offset] = keyBase64[glyph >>> 12]; 1207 buf[offset + 1] = keyBase64[0x3f & glyph >>> 6]; 1208 buf[offset + 2] = keyBase64[0x3f & glyph]; 1209 } 1210 return buf; 1211 } 1212 1213 /** 1214 * Base-64 encodes number and stores that string representation in buf starting at offset; uses 2 chars. 1215 * 1216 * @param number the byte to encode 1217 * @param offset the first position to set in buf 1218 * @param buf a char array that should be non-null and have length of at least offset + 2 1219 * @return buf, after modifying it in-place 1220 */ 1221 public static char[] b64Encode(byte number, int offset, char[] buf) { 1222 if (buf != null && buf.length >= 2 - offset) { 1223 buf[offset] = keyBase64[number >>> 6]; 1224 buf[offset + 1] = keyBase64[0x3f & number]; 1225 } 1226 return buf; 1227 1228 } 1229 1230 /** 1231 * Decodes 11 characters from data starting from offset to get a long encoded as base-64. 1232 * @param data a char array that should be have length of at least offset + 11 1233 * @param offset where in data to start reading from 1234 * @return the decoded long 1235 */ 1236 public static long b64DecodeLong(char[] data, int offset) { 1237 return (data == null || data.length < 11 + offset) ? 0L : 1238 (((long)valBase64[data[offset] & 0x7F]) << 60) 1239 | ((0x3fL & valBase64[data[offset + 1 ] & 0x7F]) << 54) 1240 | ((0x3fL & valBase64[data[offset + 2 ] & 0x7F]) << 48) 1241 | ((0x3fL & valBase64[data[offset + 3 ] & 0x7F]) << 42) 1242 | ((0x3fL & valBase64[data[offset + 4 ] & 0x7F]) << 36) 1243 | ((0x3fL & valBase64[data[offset + 5 ] & 0x7F]) << 30) 1244 | ((0x3fL & valBase64[data[offset + 6 ] & 0x7F]) << 24) 1245 | ((0x3fL & valBase64[data[offset + 7 ] & 0x7F]) << 18) 1246 | ((0x3fL & valBase64[data[offset + 8 ] & 0x7F]) << 12) 1247 | ((0x3fL & valBase64[data[offset + 9 ] & 0x7F]) << 6) 1248 | (0x3fL & valBase64[data[offset + 10] & 0x7F]); 1249 } 1250 1251 /** 1252 * Decodes 11 characters from data starting from offset to get a double encoded as base-64. 1253 * @param data a char array that should be have length of at least offset + 11 1254 * @param offset where in data to start reading from 1255 * @return the decoded double 1256 */ 1257 public static double b64DecodeDouble(char[] data, int offset) { 1258 return (data == null || data.length < 11 + offset) ? 0.0 : 1259 NumberTools.longBitsToDouble((((long) valBase64[data[offset] & 0x7F]) << 60) 1260 | ((0x3fL & valBase64[data[offset + 1 ] & 0x7F]) << 54) 1261 | ((0x3fL & valBase64[data[offset + 2 ] & 0x7F]) << 48) 1262 | ((0x3fL & valBase64[data[offset + 3 ] & 0x7F]) << 42) 1263 | ((0x3fL & valBase64[data[offset + 4 ] & 0x7F]) << 36) 1264 | ((0x3fL & valBase64[data[offset + 5 ] & 0x7F]) << 30) 1265 | ((0x3fL & valBase64[data[offset + 6 ] & 0x7F]) << 24) 1266 | ((0x3fL & valBase64[data[offset + 7 ] & 0x7F]) << 18) 1267 | ((0x3fL & valBase64[data[offset + 8 ] & 0x7F]) << 12) 1268 | ((0x3fL & valBase64[data[offset + 9 ] & 0x7F]) << 6) 1269 | (0x3fL & valBase64[data[offset + 10] & 0x7F])); 1270 } 1271 1272 /** 1273 * Decodes 6 characters from data starting from offset to get an int encoded as base-64. 1274 * @param data a char array that should be have length of at least offset + 6 1275 * @param offset where in data to start reading from 1276 * @return the decoded int 1277 */ 1278 public static int b64DecodeInt(char[] data, int offset) { 1279 return (data == null || data.length < 6 + offset) ? 0 : 1280 ((valBase64[data[offset] & 0x7F]) << 30) 1281 | ((valBase64[data[offset + 1] & 0x7F]) << 24) 1282 | ((valBase64[data[offset + 2] & 0x7F]) << 18) 1283 | ((valBase64[data[offset + 3] & 0x7F]) << 12) 1284 | ((valBase64[data[offset + 4] & 0x7F]) << 6) 1285 | (valBase64[data[offset + 5] & 0x7F]); 1286 } 1287 1288 /** 1289 * Decodes 6 characters from data starting from offset to get a float encoded as base-64. 1290 * @param data a char array that should be have length of at least offset + 6 1291 * @param offset where in data to start reading from 1292 * @return the decoded float 1293 */ 1294 public static float b64DecodeFloat(char[] data, int offset) { 1295 return (data == null || data.length < 6 + offset) ? 0f : 1296 NumberTools.intBitsToFloat(((valBase64[data[offset] & 0x7F]) << 30) 1297 | ((valBase64[data[offset + 1] & 0x7F]) << 24) 1298 | ((valBase64[data[offset + 2] & 0x7F]) << 18) 1299 | ((valBase64[data[offset + 3] & 0x7F]) << 12) 1300 | ((valBase64[data[offset + 4] & 0x7F]) << 6) 1301 | ( valBase64[data[offset + 5] & 0x7F])); 1302 } 1303 1304 /** 1305 * Decodes 3 characters from data starting from offset to get a short encoded as base-64. 1306 * @param data a char array that should be have length of at least offset + 3 1307 * @param offset where in data to start reading from 1308 * @return the decoded short 1309 */ 1310 public static short b64DecodeShort(char[] data, int offset) { 1311 return (short) ((data == null || data.length < 3 + offset) ? 0 : 1312 ((valBase64[data[offset] & 0x7F]) << 12) 1313 | ((valBase64[data[offset + 1] & 0x7F]) << 6) 1314 | ( valBase64[data[offset + 2] & 0x7F])); 1315 } 1316 /** 1317 * Decodes 3 characters from data starting from offset to get a char encoded as base-64. 1318 * @param data a char array that should be have length of at least offset + 3 1319 * @param offset where in data to start reading from 1320 * @return the decoded char 1321 */ 1322 public static char b64DecodeChar(char[] data, int offset) { 1323 return (char) ((data == null || data.length < 3 + offset) ? 0 : 1324 ((valBase64[data[offset] & 0x7F]) << 12) 1325 | ((valBase64[data[offset + 1] & 0x7F]) << 6) 1326 | ( valBase64[data[offset + 2] & 0x7F])); 1327 } 1328 1329 /** 1330 * Decodes 2 characters from data starting from offset to get a byte encoded as base-64. 1331 * @param data a char array that should be have length of at least offset + 2 1332 * @param offset where in data to start reading from 1333 * @return the decoded byte 1334 */ 1335 public static byte b64DecodeByte(char[] data, int offset) { 1336 return (byte) ((data == null || data.length < 2 + offset) ? 0 : 1337 ((valBase64[data[offset] & 0x7F]) << 6) 1338 | (valBase64[data[offset + 1] & 0x7F])); 1339 } 1340 1341 public static String hexHash(boolean... array) { 1342 return hex(CrossHash.hash64(array)); 1343 } 1344 1345 public static String hexHash(byte... array) { 1346 return hex(CrossHash.hash64(array)); 1347 } 1348 1349 public static String hexHash(short... array) { 1350 return hex(CrossHash.hash64(array)); 1351 } 1352 1353 public static String hexHash(char... array) { 1354 return hex(CrossHash.hash64(array)); 1355 } 1356 1357 public static String hexHash(int... array) { 1358 return hex(CrossHash.hash64(array)); 1359 } 1360 1361 public static String hexHash(long... array) { 1362 return hex(CrossHash.hash64(array)); 1363 } 1364 1365 /** 1366 * If text is shorter than the given minimumLength, returns a String with text padded on the right with spaces until 1367 * it reaches that length; otherwise it simply returns text. 1368 * @param text the text to pad if necessary 1369 * @param minimumLength the minimum length of String to return 1370 * @return text, potentially padded with spaces to reach the given minimum length 1371 */ 1372 public static String padRight(String text, int minimumLength) 1373 { 1374 if(text.length() < minimumLength) 1375 return padRightStrict(text, ' ', minimumLength); 1376 return text; 1377 } 1378 1379 /** 1380 * If text is shorter than the given minimumLength, returns a String with text padded on the right with padChar 1381 * until it reaches that length; otherwise it simply returns text. 1382 * @param text the text to pad if necessary 1383 * @param padChar the char to use to pad text, if necessary 1384 * @param minimumLength the minimum length of String to return 1385 * @return text, potentially padded with padChar to reach the given minimum length 1386 */ 1387 public static String padRight(String text, char padChar, int minimumLength) 1388 { 1389 if(text.length() < minimumLength) 1390 return padRightStrict(text, padChar, minimumLength); 1391 return text; 1392 } 1393 1394 /** 1395 * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on 1396 * its right side with spaces until totalLength is reached. If text is longer than totalLength, this only uses the 1397 * portion of text needed to fill totalLength, and no more. 1398 * @param text the String to pad if necessary, or truncate if too long 1399 * @param totalLength the exact length of String to return 1400 * @return a String with exactly totalLength for its length, made from text and possibly extra spaces 1401 */ 1402 public static String padRightStrict(String text, int totalLength) { 1403 return padRightStrict(text, ' ', totalLength); 1404 } 1405 1406 /** 1407 * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on 1408 * its right side with padChar until totalLength is reached. If text is longer than totalLength, this only uses the 1409 * portion of text needed to fill totalLength, and no more. 1410 * @param text the String to pad if necessary, or truncate if too long 1411 * @param padChar the char to use to fill any remaining length 1412 * @param totalLength the exact length of String to return 1413 * @return a String with exactly totalLength for its length, made from text and possibly padChar 1414 */ 1415 public static String padRightStrict(String text, char padChar, int totalLength) { 1416 char[] c = new char[totalLength]; 1417 int len = text.length(); 1418 text.getChars(0, Math.min(len, totalLength), c, 0); 1419 for (int i = len; i < totalLength; i++) { 1420 c[i] = padChar; 1421 } 1422 return String.valueOf(c); 1423 } 1424 1425 /** 1426 * If text is shorter than the given minimumLength, returns a String with text padded on the left with spaces until 1427 * it reaches that length; otherwise it simply returns text. 1428 * @param text the text to pad if necessary 1429 * @param minimumLength the minimum length of String to return 1430 * @return text, potentially padded with spaces to reach the given minimum length 1431 */ 1432 public static String padLeft(String text, int minimumLength) 1433 { 1434 if(text.length() < minimumLength) 1435 return padLeftStrict(text, ' ', minimumLength); 1436 return text; 1437 } 1438 /** 1439 * If text is shorter than the given minimumLength, returns a String with text padded on the left with padChar until 1440 * it reaches that length; otherwise it simply returns text. 1441 * @param text the text to pad if necessary 1442 * @param padChar the char to use to pad text, if necessary 1443 * @param minimumLength the minimum length of String to return 1444 * @return text, potentially padded with padChar to reach the given minimum length 1445 */ 1446 public static String padLeft(String text, char padChar, int minimumLength) 1447 { 1448 if(text.length() < minimumLength) 1449 return padLeftStrict(text, padChar, minimumLength); 1450 return text; 1451 } 1452 1453 /** 1454 * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on 1455 * its left side with spaces until totalLength is reached. If text is longer than totalLength, this only uses the 1456 * portion of text needed to fill totalLength, and no more. 1457 * @param text the String to pad if necessary, or truncate if too long 1458 * @param totalLength the exact length of String to return 1459 * @return a String with exactly totalLength for its length, made from text and possibly extra spaces 1460 */ 1461 public static String padLeftStrict(String text, int totalLength) { 1462 return padLeftStrict(text, ' ', totalLength); 1463 } 1464 1465 /** 1466 * Constructs a String with exactly the given totalLength by taking text (or a substring of it) and padding it on 1467 * its left side with padChar until totalLength is reached. If text is longer than totalLength, this only uses the 1468 * portion of text needed to fill totalLength, and no more. 1469 * @param text the String to pad if necessary, or truncate if too long 1470 * @param padChar the char to use to fill any remaining length 1471 * @param totalLength the exact length of String to return 1472 * @return a String with exactly totalLength for its length, made from text and possibly padChar 1473 */ 1474 public static String padLeftStrict(String text, char padChar, int totalLength) { 1475 char[] c = new char[totalLength]; 1476 int len = text.length(); 1477 text.getChars(0, Math.min(len, totalLength), c, Math.max(0, totalLength - len)); 1478 for (int i = totalLength - len - 1; i >= 0; i--) { 1479 c[i] = padChar; 1480 } 1481 return String.valueOf(c); 1482 } 1483 1484 /** 1485 * Word-wraps the given String (or other CharSequence, such as a StringBuilder) so it is split into zero or more 1486 * Strings as lines of text, with the given width as the maximum width for a line. This correctly splits most (all?) 1487 * text in European languages on spaces (treating all whitespace characters matched by the regex '\\s' as breaking), 1488 * and also uses the English-language rule (probably used in other languages as well) of splitting on hyphens and 1489 * other dash characters (Unicode category Pd) in the middle of a word. This means for a phrase like "UN Secretary 1490 * General Ban-Ki Moon", if the width was 12, then the Strings in the List returned would be 1491 * <br> 1492 * <pre> 1493 * "UN Secretary" 1494 * "General Ban-" 1495 * "Ki Moon" 1496 * </pre> 1497 * Spaces are not preserved if they were used to split something into two lines, but dashes are. 1498 * @param longText a probably-large piece of text that needs to be split into multiple lines with a max width 1499 * @param width the max width to use for any line, removing trailing whitespace at the end of a line 1500 * @return a List of Strings for the lines after word-wrapping 1501 */ 1502 public static List<String> wrap(CharSequence longText, int width) 1503 { 1504 if(width <= 0) 1505 return new ArrayList<>(0); 1506 return wrap(new ArrayList<String>(longText.length() / width + 2), longText, width); 1507 } 1508 /** 1509 * Word-wraps the given String (or other CharSequence, such as a StringBuilder) so it is split into zero or more 1510 * Strings as lines of text, with the given width as the maximum width for a line; appends the word-wrapped lines to 1511 * the given List of Strings and does not create a new List. This correctly splits most (all?) text in European 1512 * languages on spaces (treating all whitespace characters matched by the regex '\\s' as breaking), and also uses 1513 * the English-language rule (probably used in other languages as well) of splitting on hyphens and other dash 1514 * characters (Unicode category Pd) in the middle of a word. This means for a phrase like "UN Secretary General 1515 * Ban-Ki Moon", if the width was 12, then the Strings in the List returned would be 1516 * <br> 1517 * <pre> 1518 * "UN Secretary" 1519 * "General Ban-" 1520 * "Ki Moon" 1521 * </pre> 1522 * Spaces are not preserved if they were used to split something into two lines, but dashes are. 1523 * @param receiving the List of String to append the word-wrapped lines to 1524 * @param longText a probably-large piece of text that needs to be split into multiple lines with a max width 1525 * @param width the max width to use for any line, removing trailing whitespace at the end of a line 1526 * @return the given {@code receiving} parameter, after appending the lines from word-wrapping 1527 */ 1528 public static List<String> wrap(List<String> receiving, CharSequence longText, int width) 1529 { 1530 if(width <= 0 || receiving == null) 1531 return receiving; 1532 Matcher widthMatcher = Pattern.compile("(?:({=Y}(?!\\s).{1," + width + "})((?<=\\p{Pd})|(\\s+)))|({=Y}\\S{1," + width + "})").matcher(longText + "\n"); 1533 while (widthMatcher.find()) 1534 { 1535 receiving.add(widthMatcher.group("Y")); 1536 } 1537 return receiving; 1538 } 1539 1540 public static String replace(CharSequence text, String before, String after) { 1541 if(text instanceof String) 1542 { 1543 return ((String)text).replace(before, after); 1544 } 1545 String t = text.toString(); 1546 return t.replace(before, after); 1547 } 1548 1549 public static final Pattern whitespacePattern = Pattern.compile("\\s+"), 1550 nonSpacePattern = Pattern.compile("\\S+"); 1551 private static final Matcher matcher = new Matcher(whitespacePattern); 1552 public static int indexOf(CharSequence text, Pattern regex, int beginIndex) 1553 { 1554 matcher.setPattern(regex); 1555 matcher.setTarget(text); 1556 matcher.setPosition(beginIndex); 1557 if(!matcher.find()) 1558 return -1; 1559 return matcher.start(); 1560 } 1561 public static int indexOf(CharSequence text, String regex, int beginIndex) 1562 { 1563 matcher.setPattern(Pattern.compile(regex)); 1564 matcher.setTarget(text); 1565 matcher.setPosition(beginIndex); 1566 if(!matcher.find()) 1567 return -1; 1568 return matcher.start(); 1569 } 1570 public static int indexOf(CharSequence text, Pattern regex) 1571 { 1572 matcher.setPattern(regex); 1573 matcher.setTarget(text); 1574 if(!matcher.find()) 1575 return -1; 1576 return matcher.start(); 1577 } 1578 public static int indexOf(CharSequence text, String regex) 1579 { 1580 matcher.setPattern(Pattern.compile(regex)); 1581 matcher.setTarget(text); 1582 if(!matcher.find()) 1583 return -1; 1584 return matcher.start(); 1585 } 1586 private static final Matcher capitalizeMatcher = Pattern.compile("(?<!\\pL)(\\pL)(\\pL*)(\\PL*)").matcher(); 1587 private static final StringBuilder sb = new StringBuilder(64); 1588 1589 /** 1590 * Capitalizes Each Word In The Parameter {@code original}, Returning A New String. 1591 * @param original a CharSequence, such as a StringBuilder or String, which could have CrAzY capitalization 1592 * @return A String With Each Word Capitalized At The Start And The Rest In Lower Case 1593 */ 1594 public static String capitalize(final CharSequence original) { 1595 if (original == null || original.length() <= 0) { 1596 return ""; 1597 } 1598 sb.setLength(0); 1599 capitalizeMatcher.setTarget(original); 1600 while (capitalizeMatcher.find()) { 1601 sb.append(capitalizeMatcher.group(1).toUpperCase()); 1602 capitalizeMatcher.getGroup(2, sb, 1); // mode 1 is case-insensitive, which lower-cases result 1603 capitalizeMatcher.getGroup(3, sb); 1604 } 1605 return sb.toString(); 1606 } 1607 private static final Matcher sentenceMatcher = Pattern.compile("(\\PL*)((\\pL)([^.?!]*)($|[.?!]+))(\\PL*)").matcher(); 1608 // group 1 before letters, group 2 whole sentence, group 3 first letter, group 4 rest of sentence, group 5 closing punctuation, group 6 remainder of non-letters 1609 1610 /** 1611 * Attempts to scan for sentences in {@code original}, capitalizes the first letter of each sentence, and otherwise 1612 * leaves the CharSequence untouched as it returns it as a String. Sentences are detected with a crude heuristic of 1613 * "does it have periods, exclamation marks, or question marks at the end, or does it reach the end of input? If 1614 * yes, it's a sentence." 1615 * @param original a CharSequence that is expected to contain sentence-like data that needs capitalization; existing upper-case letters will stay upper-case. 1616 * @return a String where the first letter of each sentence (detected as best this can) is capitalized. 1617 */ 1618 public static String sentenceCase(final CharSequence original) { 1619 if (original == null || original.length() <= 0) { 1620 return ""; 1621 } 1622 sb.setLength(0); 1623 sentenceMatcher.setTarget(original); 1624 while (sentenceMatcher.find()) { 1625 sentenceMatcher.getGroup(1, sb); 1626 sb.append(sentenceMatcher.group(3).toUpperCase()); 1627 sentenceMatcher.getGroup(4, sb); // use getGroup(4, sb, 1) if this should lower-case the rest 1628 sentenceMatcher.getGroup(5, sb); 1629 sentenceMatcher.getGroup(6, sb); 1630 } 1631 return sb.toString(); 1632 } 1633 private static final Replacer anReplacer = new Replacer(Pattern.compile("\\b([Aa])(\\p{G}+)(?="+FakeLanguageGen.anyVowel+")", Pattern.IGNORE_CASE | Pattern.UNICODE), "$1n$2"); 1634 1635 /** 1636 * A simple method that looks for any occurrences of the word 'a' followed by some non-zero amount of whitespace and 1637 * then any vowel starting the following word (such as 'a item'), then replaces each such improper 'a' with 'an' 1638 * (such as 'an item'). The regex used here isn't bulletproof, but it should be fairly robust, handling when you 1639 * have multiple whitespace chars, different whitespace chars (like carriage return and newline), accented vowels in 1640 * the following word (but not in the initial 'a', which is expected to use English spelling rules), and the case of 1641 * the initial 'a' or 'A'. 1642 * <br> 1643 * Gotta love Regexodus; this is a two-liner that uses features specific to that regular expression library. 1644 * @param text the (probably generated English) multi-word text to search for 'a' in and possibly replace with 'an' 1645 * @return a new String with every improper 'a' replaced 1646 */ 1647 public static String correctABeforeVowel(final CharSequence text){ 1648 return anReplacer.replace(text); 1649 } 1650 1651 /** 1652 * Constant storing the 16 hexadecimal digits, as char values, in order. 1653 */ 1654 public static final char[] hexDigits = { 1655 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' 1656 }; 1657 1658 /** 1659 * A constant containing only chars that are reasonably likely to be supported by broad fonts and thus display-able. 1660 * This assumes the font supports Latin, Greek, and Cyrillic alphabets, with good support for extended Latin (at 1661 * least for European languages) but not required to be complete enough to support the very large Vietnamese set of 1662 * extensions to Latin, nor to support any International Phonetic Alphabet (IPA) chars. It also assumes box drawing 1663 * characters are supported and a handful of common dingbats, such as male and female signs. It does not include 1664 * the tab, newline, or carriage return characters, since these don't usually make sense on a grid of chars. 1665 */ 1666 public static final String PERMISSIBLE_CHARS = 1667 " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmno"+ 1668 "pqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàá"+ 1669 "âãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİı"+ 1670 "ĴĵĶķĹĺĻļĽľĿŀŁłŃńŅņŇňŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƒǺǻǼǽǾǿ"+ 1671 "ȘșȚțȷˆˇˉˋ˘˙˚˛˜˝΄΅Ά·ΈΉΊΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυ"+ 1672 "φχψωϊϋόύώЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхц"+ 1673 "чшщъыьэюяѐёђѓєѕіїјљњћќѝўџѴѵҐґẀẁẂẃẄẅỲỳ–—‘’‚‛“”„†‡•…‰‹›ⁿ₤€№™Ω℮←↑→↓∆−√≈" + 1674 "─│┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬■□▲▼○●◦♀♂♠♣♥♦♪"; 1675 1676 public static final String BOX_DRAWING_SINGLE = "─│┌┐└┘├┤┬┴┼"; 1677 public static final String BOX_DRAWING_DOUBLE = "═║╔╗╚╝╠╣╦╩╬"; 1678 public static final String BOX_DRAWING = "─│┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬"; 1679 public static final String VISUAL_SYMBOLS = "←↑→↓■□▲▼○●◦♀♂♠♣♥♦♪"; 1680 public static final String DIGITS = "0123456789"; 1681 public static final String MARKS = "~`^'¨¯°´¸ˆˇˉˋ˘˙˚˛˜˝΄΅‘’‚‛"; 1682 /** 1683 * Can be used to match an index with one in {@link #GROUPING_SIGNS_CLOSE} to find the closing char (this way only). 1684 */ 1685 public static final String GROUPING_SIGNS_OPEN = "([{<«‘‛“‹"; 1686 /** 1687 * An index in {@link #GROUPING_SIGNS_OPEN} can be used here to find the closing char for that opening one. 1688 */ 1689 public static final String GROUPING_SIGNS_CLOSE = ")]}>»’’”›"; 1690 public static final String COMMON_PUNCTUATION = "!\"%&'*+,-./:;<>?•…–—"; 1691 public static final String MODERN_PUNCTUATION = "@\\^_`|~¦©®™´№♀♂♪"; 1692 public static final String UNCOMMON_PUNCTUATION = "§¶¨ªº¯°·¸¡¿·‚„†‡"; 1693 public static final String TECHNICAL_PUNCTUATION = "#%'*+,-./<=>^|¬°µ±¹²³ⁿ¼½¾×÷‰№Ω℮∆−√≈"; 1694 public static final String PUNCTUATION = COMMON_PUNCTUATION + MODERN_PUNCTUATION + UNCOMMON_PUNCTUATION + 1695 TECHNICAL_PUNCTUATION + GROUPING_SIGNS_OPEN + GROUPING_SIGNS_CLOSE; 1696 public static final String CURRENCY = "$¢£¤¥₤€"; 1697 public static final String SPACING = " "; 1698 public static final String ENGLISH_LETTERS_UPPER = 1699 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 1700 public static final String ENGLISH_LETTERS_LOWER = 1701 "abcdefghijklmnopqrstuvwxyz"; 1702 public static final String ENGLISH_LETTERS = ENGLISH_LETTERS_UPPER + ENGLISH_LETTERS_LOWER; 1703 1704 public static final String LATIN_EXTENDED_LETTERS_UPPER = 1705 "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŨŪŬŮŰŲŴŶŸŹŻŽǺǼǾȘȚẀẂẄỲßSFJ"; 1706 public static final String LATIN_EXTENDED_LETTERS_LOWER = 1707 "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıĵķĺļľŀłńņňŋōŏőœŕŗřśŝşšţťũūŭůűųŵŷÿźżžǻǽǿșțẁẃẅỳßſƒȷ"; 1708 public static final String LATIN_EXTENDED_LETTERS = LATIN_EXTENDED_LETTERS_UPPER + LATIN_EXTENDED_LETTERS_LOWER; 1709 1710 public static final String LATIN_LETTERS_UPPER = ENGLISH_LETTERS_UPPER + LATIN_EXTENDED_LETTERS_UPPER; 1711 public static final String LATIN_LETTERS_LOWER = ENGLISH_LETTERS_LOWER + LATIN_EXTENDED_LETTERS_LOWER; 1712 public static final String LATIN_LETTERS = LATIN_LETTERS_UPPER + LATIN_LETTERS_LOWER; 1713 1714 /** 1715 * Includes the letter Sigma, 'Σ', twice because it has two lower-case forms in {@link #GREEK_LETTERS_LOWER}. This 1716 * lets you use one index for both lower and upper case, like with Latin and Cyrillic. 1717 */ 1718 public static final String GREEK_LETTERS_UPPER = 1719 "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏΪΫΪΫ"; 1720 /** 1721 * Includes both lower-case forms for Sigma, 'ς' and 'σ', but this matches the two upper-case Sigma in 1722 * {@link #GREEK_LETTERS_UPPER}. This lets you use one index for both lower and upper case, like with Latin and 1723 * Cyrillic. 1724 */ 1725 public static final String GREEK_LETTERS_LOWER = 1726 "αβγδεζηθικλμνξοπρςστυφχψωάέήίόύώϊϋΐΰ"; 1727 1728 public static final String GREEK_LETTERS = GREEK_LETTERS_UPPER + GREEK_LETTERS_LOWER; 1729 1730 public static final String CYRILLIC_LETTERS_UPPER = 1731 "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏѴҐ"; 1732 public static final String CYRILLIC_LETTERS_LOWER = 1733 "абвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѵґ"; 1734 public static final String CYRILLIC_LETTERS = CYRILLIC_LETTERS_UPPER + CYRILLIC_LETTERS_LOWER; 1735 1736 public static final String LETTERS_UPPER = LATIN_LETTERS_UPPER + GREEK_LETTERS_UPPER + CYRILLIC_LETTERS_UPPER; 1737 public static final String LETTERS_LOWER = LATIN_LETTERS_LOWER + GREEK_LETTERS_LOWER + CYRILLIC_LETTERS_LOWER; 1738 public static final String LETTERS = LETTERS_UPPER + LETTERS_LOWER; 1739 public static final String LETTERS_AND_NUMBERS = LETTERS + DIGITS; 1740}