001package squidpony; 002 003import regexodus.Matcher; 004import regexodus.Pattern; 005import regexodus.REFlags; 006import squidpony.squidmath.*; 007 008import java.io.Reader; 009import java.io.Serializable; 010import java.util.*; 011 012import static squidpony.ArrayTools.letters; 013 014/** 015 * A simple format parser for String-based configuration or data files where JSON is overkill. 016 * Supports only one type, String, but allows each String to have arbitrary nested levels of 017 * String children as if in sub-lists. You can interpret the Strings however you want, and 018 * quoting each String isn't necessary if they are just one word ("bare words" are allowed). 019 * This stores its items in an inner class, {@link ObTextEntry}, which only has a "primary" 020 * String and may have a List of "associated" ObTextEntry values, each of which must have 021 * their own primary String and which may have their own associated List. 022 * <br> 023 * You can use this like any other List, though it will be contain ObTextEntry objects instead 024 * of Strings directly. This allows you to control whether you want to iterate through a 025 * particular primary String's associated entries, if there are any, or to skip over them and 026 * go to the next String in the current List. 027 * <br> 028 * This extends ArrayList of ObTextEntry and is modifiable, but it doesn't act quite like what 029 * what you might expect from an ArrayList. Chiefly, this only considers the top-level Strings 030 * to be part of the List for length and for {@link #contains(Object)}, and will ignore child 031 * strings unless you access them via the {@link ObTextEntry#associated} List on an entry that 032 * has associated entries. 033 * <br> 034 * A common way to use this is with {@link #parse(CharSequence)} to read a String in the 035 * following format. 036 * <br> 037 * Format example: 038 * <pre> 039 * hello world 040 * 'how are you today?' [just great thanks] 041 * hooray! 042 * 043 * complexity? 044 * [it is possible [yes this is a good example] 045 * 'escapes like \[\'\] all work' 046 * "you can use double or single quotes to allow spaces and brackets in one string" 047 * ] 048 * 049 * comments are allowed // like this 050 * comments can have different forms # like this 051 * // block comments like in c are allowed 052 * / * but because this example is in javadoc, this example is not actually a comment * / 053 * // remove the spaces between each slash and asterisk to make the last line a comment. 054 * /[delimit/or block comments with delimiters/delimit]/ 055 * 056 * ''' 057 * raw strings (heredocs) look like this normally. 058 * they permit characters without escapes, ]][][[ \/\/\ , 059 * except for triple quotes. 060 * they keep newlines and indentation intact, 061 * except for up to one newline ignored adjacent to each triple quote. 062 * ''' 063 * 064 * [[different[ 065 * if you may need triple quotes 066 * in the raw string, use a different syntax that allows delimiters. 067 * here, the delimiter is '''different''', just to be different.]different]] 068 * </pre> 069 * <br> 070 * Inspired strongly by <a href="https://github.com/igagis/stob">STOB</a> and its 071 * <a href="https://github.com/igagis/stob-java">Java port</a> , but no code is shared and the format is 072 * slightly different. The main differences are: 073 * <ul> 074 * <li>We use square brackets in place of STOB's curly braces to mark children associated with a string.<li> 075 * <li>ObText supports nested block comments using the syntax {@code /[delimiter/contents/delimiter]/} where 076 * delimiter may be empty but must match on both sides, and contents is the body of the comment.</li> 077 * <li>ObText uses Python-like "heredoc" syntax for raw strings surrounded by triple-apostrophes '''like so''' 078 * with optional initial and final newlines in the raw string ignored. An alternate raw string 079 * syntax is present that allows delimiters, using the syntax {@code [[delimiter[contents]delimiter]]}, where 080 * again delimiter may be empty and contents is the body of the raw string.</li> 081 * <ul> 082 */ 083public class ObText extends ArrayList<ObText.ObTextEntry> implements Serializable{ 084 private static final long serialVersionUID = 7L; 085 public static class ObTextEntry implements Serializable 086 { 087 private static final long serialVersionUID = 7L; 088 public String primary; 089 public ArrayList<ObTextEntry> associated; 090 public ObTextEntry() 091 { 092 } 093 public ObTextEntry(String primaryString) 094 { 095 primary = primaryString; 096 } 097 public ObTextEntry(String primaryString, Collection<ObTextEntry> associatedStrings) 098 { 099 primary = primaryString; 100 associated = new ArrayList<>(associatedStrings); 101 } 102 public void add(ObTextEntry entry) 103 { 104 if(associated == null) 105 associated = new ArrayList<>(16); 106 associated.add(entry); 107 } 108 public void add(String text) 109 { 110 if(associated == null) 111 associated = new ArrayList<>(16); 112 associated.add(new ObTextEntry(text)); 113 } 114 public boolean hasAssociated() 115 { 116 return associated != null && !associated.isEmpty(); 117 } 118 public List<ObTextEntry> openAssociated() 119 { 120 if(associated == null) 121 associated = new ArrayList<>(16); 122 return associated; 123 } 124 public String firstAssociatedString() 125 { 126 ObTextEntry got; 127 if(associated == null || associated.isEmpty() || (got = associated.get(0)) == null) 128 return null; 129 return got.primary; 130 } 131 public ArrayList<String> allAssociatedStrings() 132 { 133 if(associated == null || associated.isEmpty()) 134 return new ArrayList<>(1); 135 int sz = associated.size(); 136 ArrayList<String> strings = new ArrayList<>(sz); 137 for (int i = 0; i < sz; i++) { 138 strings.add(associated.get(i).primary); 139 } 140 return strings; 141 } 142 public ArrayList<String> shallowContents() 143 { 144 if(associated == null || associated.isEmpty()) 145 return new ArrayList<>(1); 146 int sz = associated.size(); 147 ArrayList<String> strings = new ArrayList<>(sz); 148 iterate(strings, associated); 149 return strings; 150 } 151 public ObTextEntry firstAssociatedEntry() 152 { 153 if(associated == null || associated.isEmpty()) 154 return null; 155 return associated.get(0); 156 } 157 158 @Override 159 public boolean equals(Object o) { 160 if (this == o) return true; 161 if (o == null || getClass() != o.getClass()) return false; 162 163 ObTextEntry entry = (ObTextEntry) o; 164 165 if (!primary.equals(entry.primary)) return false; 166 return associated != null ? associated.equals(entry.associated) : entry.associated == null; 167 } 168 169 public long hash64() { 170 long result = CrossHash.hash64(primary), z = 0x60642E2A34326F15L; 171 if(associated == null) 172 return result ^ z; 173 final int len = associated.size(); 174 result ^= len; 175 for (int i = 0; i < len; i++) { 176 result ^= associated.get(i).hash64() * (z += 0xC6BC279692B5CC86L); 177 result = (result << 11 | result >>> 53); 178 } 179 result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L; 180 result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL; 181 return ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L); 182 } 183 184 @Override 185 public int hashCode() { 186 return (int)hash64(); 187 } 188 } 189 public static final Pattern pattern = Pattern.compile( 190 "(?>'''(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?({=s}.*?)(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?''')" + 191 "|(?>\\[\\[({=q}[^\\[\\]]*)\\[(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?({=s}.*?)(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?\\]{\\q}\\]\\])" + 192 "|(?>({=q}[\"'])({=s}.*?)(?<!\\\\){\\q})" + 193 "|(?>(?>//|#)(?>\\V*))" + 194 "|(?>/\\*(?:.*?)\\*/)" + 195 "|(?>/\\[({=q}\\S*)/(?:.*?)/{\\q}\\]/)" + 196 "|({=s}[^\\s\\[\\]\"'#\\\\]+)" + 197 "|({=o}\\[)" + 198 "|({=c}\\])", REFlags.DOTALL | REFlags.UNICODE 199 ), 200 patternRelaxed = Pattern.compile( 201 "(?>'''(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?({=s}.*?)(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?''')" + 202 "|(?>\\[\\[({=q}[^\\[\\]]*)\\[(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?({=s}.*?)(?:[\n\u000C\f\r\u0085\u2028\u2029]|\r\n)?\\]{\\q}\\]\\])" + 203 "|(?>({=q}[\"'])({=s}.*?)(?<!\\\\){\\q})" + 204 //"|(?>(?>//|#)(?>\\V*))" + 205 //"|(?>/\\*(?:.*?)\\*/)" + 206 //"|(?>/\\[({=q}\\S*)/(?:.*?)/{\\q}\\]/)" + 207 "|({=s}[^\\s\\[\\]\"'\\\\]+)" 208 , REFlags.DOTALL | REFlags.UNICODE 209 ); 210 211 212 public static final int stringId = pattern.groupId("s"), 213 openId = pattern.groupId("o"), closeId = pattern.groupId("c"); 214 215 protected static final Pattern illegalBareWord = Pattern.compile("[\\s\\[\\]\"'#\\\\]|(?:/[/\\*])"), 216 reallyIllegalBareWord = Pattern.compile("[\\s\\[\\]\"'\\\\]"), 217 needsRaw = Pattern.compile("(?<!\\\\)[\\[\\]]|\\\\$"); 218 protected static final Matcher m = pattern.matcher(); 219 protected static final Matcher bare = illegalBareWord.matcher(), raw = needsRaw.matcher(), 220 reallyBare = reallyIllegalBareWord.matcher(); 221 222 //protected final ArrayList<ObTextEntry> entries = new ArrayList<ObTextEntry>(64); 223// protected final IntVLA neighbors = new IntVLA(64); 224// private final IntVLA nesting = new IntVLA(16); 225// protected int length = 0; 226 public ObText() 227 { 228 229 } 230 231 public ObText(CharSequence text) 232 { 233 parse(text); 234 } 235 236 /** 237 * Parses the given text (a String or other CharSequence) and appends it into this ObText. 238 * @param text a CharSequence (such as a String) using ObText formatting, as described in this class' JavaDocs 239 * @return this ObText object after appending the parsed text, for chaining 240 */ 241 public ObText parse(CharSequence text) 242 { 243 m.setTarget(text); 244 ObTextEntry current = null; 245 List<ObTextEntry> ls = this; 246 IntVLA nesting = new IntVLA(4); 247 nesting.add(-1); 248 int depth = 0; 249 while (m.find()) { 250 if (m.isCaptured(stringId)) { 251 ls.add(current = new ObTextEntry(m.group(stringId))); 252 nesting.incr(depth, 1); 253 } 254 else if(m.isCaptured(openId)) 255 { 256 if(current == null) throw new UnsupportedOperationException("ObText entries can't have associated items without a primary String."); 257 nesting.add(-1); 258 ls = current.openAssociated(); 259 depth++; 260 } 261 else if(m.isCaptured(closeId)) 262 { 263 if(nesting.size <= 1) throw new UnsupportedOperationException("Associated item sequences in ObText can't end more times than they start."); 264 nesting.pop(); 265 depth--; 266 ls = this; 267 for (int i = 0; i < depth; i++) { 268 ls = ls.get(nesting.get(i)).associated; 269 } 270 } 271 } 272 return this; 273 } 274 275 /** 276 * Inserts the given String element at the specified position in this ObText's top level. 277 * Shifts the element currently at that position (if any) and any subsequent 278 * elements to the right (adds one to their indices). 279 * @param index index at which the specified element is to be inserted 280 * @param text String element to be inserted, without any associated entries 281 */ 282 public void add(int index, String text) { 283 super.add(index, new ObTextEntry(text)); 284 } 285 286 /** 287 * Appends the given String element to the end of this ObText at the top level. 288 * @param text String element to be inserted, without any associated entries 289 * @return {@code true} (this always modifies the ObText) 290 */ 291 public boolean add(String text) { 292 return super.add(new ObTextEntry(text)); 293 } 294 295 public long hash64() 296 { 297 long result = 0x1A976FDF6BF60B8EL, z = 0x60642E2A34326F15L; 298 final int len = size(); 299 result ^= len; 300 for (int i = 0; i < len; i++) { 301 result ^= get(i).hash64() * (z += 0xC6BC279692B5CC86L); 302 result = (result << 11 | result >>> 53); 303 } 304 result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L; 305 result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL; 306 return ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L); 307 308 } 309 310 @Override 311 public int hashCode() { 312 return (int)hash64(); 313 } 314 315 // Used to generate randomized delimiters using up to 9 non-English letters. 316 // call while assigning your state with randomChars(state += 0x9E3779B97F4A7C15L, myChars) 317 // that assumes you have a 9-element char[] called myChars 318 // as long as z/state is deterministic (i.e. based on a hash), this should be too 319 private static void randomChars(long z, char[] mut) 320 { 321 z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L; 322 z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL; 323 z ^= (z >>> 31); 324 325 mut[0] = letters[(int)(128 + (z & 127))]; 326 mut[1] = letters[(int)(128 + (z >>> 7 & 127))]; 327 mut[2] = letters[(int)(128 + (z >>> 14 & 127))]; 328 mut[3] = letters[(int)(128 + (z >>> 21 & 127))]; 329 mut[4] = letters[(int)(128 + (z >>> 28 & 127))]; 330 mut[5] = letters[(int)(128 + (z >>> 35 & 127))]; 331 mut[6] = letters[(int)(128 + (z >>> 42 & 127))]; 332 mut[7] = letters[(int)(128 + (z >>> 49 & 127))]; 333 mut[8] = letters[(int)(128 + (z >>> 56 & 127))]; 334 } 335 336 public static void appendQuoted(StringBuilder sb, String text) 337 { 338 appendQuoted(sb, text, reallyBare); 339 } 340 341 public static void appendQuotedObText(StringBuilder sb, String text) 342 { 343 appendQuoted(sb, text, bare); 344 } 345 protected static void appendQuoted(StringBuilder sb, String text, Matcher bareFinder) 346 { 347 if(text == null || text.isEmpty()) { 348 sb.append("''"); 349 return; 350 } 351 bareFinder.setTarget(text); 352 if(!bareFinder.find()) 353 sb.append(text); 354 else 355 { 356 raw.setTarget(text); 357 if(raw.find()) { 358 359 if (text.contains("'''")) { 360 long state = CrossHash.hash64(text); 361 char[] myChars = new char[9]; 362 int count; 363 do { 364 randomChars(state += 0x9E3779B97F4A7C15L, myChars); 365 count = StringKit.containsPart(text, myChars, "]", "]]"); 366 } while (count == 12); 367 sb.append("[[").append(myChars, 0, count).append("[\n").append(text).append("\n]") 368 .append(myChars, 0, count).append("]]"); 369 } else { 370 sb.append("'''\n").append(text).append("\n'''"); 371 } 372 } 373 else if(!text.contains("'")) 374 { 375 sb.append('\'').append(text).append('\''); 376 } 377 else 378 { 379 if(text.contains("\"")) 380 { 381 if(text.contains("'''")) 382 { 383 long state = CrossHash.hash64(text); 384 char[] myChars = new char[9]; 385 int count; 386 do 387 { 388 randomChars(state += 0x9E3779B97F4A7C15L, myChars); 389 count = StringKit.containsPart(text, myChars); 390 }while(count == 9); 391 sb.append("[[").append(myChars, 0, count).append("[\n").append(text).append("\n]") 392 .append(myChars, 0, count).append("]]"); 393 } 394 else 395 { 396 sb.append("'''\n").append(text).append("\n'''"); 397 } 398 } 399 else 400 { 401 sb.append('"').append(text).append('"'); 402 } 403 } 404 } 405 } 406 407 @Override 408 public String toString() { 409 return "ObText object: [[[[\n" + serializeToString() + "\n]]]]"; 410 } 411 412 public String serializeToString() 413 { 414 StringBuilder sb = new StringBuilder(100); 415 iterate(sb, this); 416 return sb.toString(); 417 } 418 419 /** 420 * Deserializes an ObText that was serialized by {@link #serializeToString()} or {@link #toString()}, and will 421 * ignore the prefix and suffix that toString appends for readability (these are "ObText object: [[[[ " and " ]]]]", 422 * for reference). This is otherwise the same as calling the constructor {@link #ObText(CharSequence)}. 423 * @param data a String that is usually produced by serializeToString or toString on an ObText 424 * @return a new ObText produced by parsing data (disregarding any prefix or suffix from toString() ) 425 */ 426 public static ObText deserializeFromString(String data) 427 { 428 if(data.startsWith("ObText object: [[[[\n")) 429 { 430 return new ObText(data.substring(20, data.length() - 5)); 431 } 432 return new ObText(data); 433 } 434 435 private static void iterate(StringBuilder sb, ArrayList<ObTextEntry> obt) 436 { 437 int len = obt.size(); 438 ObTextEntry entry; 439 for (int i = 0; i < len; i++) { 440 appendQuotedObText(sb, (entry = obt.get(i)).primary); 441 sb.append('\n'); 442 if(entry.hasAssociated()) 443 { 444 sb.append("[\n"); 445 iterate(sb, entry.associated); 446 sb.append("]\n"); 447 } 448 } 449 } 450 451 private static void iterate(ArrayList<String> buffer, ArrayList<ObTextEntry> obt) 452 { 453 int len = obt.size(); 454 ObTextEntry entry; 455 for (int i = 0; i < len; i++) { 456 buffer.add((entry = obt.get(i)).primary); 457 if(entry.hasAssociated()) 458 { 459 iterate(buffer, entry.associated); 460 } 461 } 462 } 463 464 /** 465 * Gets all Strings from the top level of this ObText, not including any associated values, and puts them in 466 * an {@link ArrayList} of String. The returned list will retain the same order the Strings were entered in, and 467 * unlike {@link #keySet()} or {@link #keyOrderedSet()}, duplicate keys will all be preserved. Changes to the 468 * returned List won't be reflected in this ObText. 469 * @return all top-level Strings (without associated values) as an ArrayList of String 470 */ 471 public ArrayList<String> keyList() 472 { 473 final int sz = size(); 474 ArrayList<String> keys = new ArrayList<>(sz); 475 for (int i = 0; i < sz; i++) { 476 keys.add(get(i).primary); 477 } 478 return keys; 479 } 480 481 /** 482 * Gets all unique Strings from the top level of this ObText, not including any associated values, and puts them in 483 * an {@link OrderedSet} of String. The returned set will retain the same order the Strings were entered in, and you 484 * can use OrderedSet methods like {@link OrderedSet#getAt(int)} to look up keys by index. Changes to the returned 485 * Set won't be reflected in this ObText. 486 * @return all unique top-level Strings (without associated values) as an OrderedSet of String keys 487 */ 488 public OrderedSet<String> keyOrderedSet() 489 { 490 final int sz = size(); 491 OrderedSet<String> keys = new OrderedSet<>(sz); 492 for (int i = 0; i < sz; i++) { 493 keys.add(get(i).primary); 494 } 495 return keys; 496 } 497 498 /** 499 * Gets all unique Strings from the top level of this ObText, not including any associated values, and puts them in 500 * a {@link HashSet} of String. The returned set won't be insertion-ordered or necessarily retain the same order the 501 * Strings were entered in; use {@link #keyOrderedSet()} if you want this. Changes to the returned Set won't be 502 * reflected in this ObText. 503 * @return all unique top-level Strings (without associated values) as a HashSet of String keys 504 */ 505 public HashSet<String> keySet() 506 { 507 final int sz = size(); 508 HashSet<String> keys = new HashSet<>(sz, 0.25f); 509 for (int i = 0; i < sz; i++) { 510 keys.add(get(i).primary); 511 } 512 return keys; 513 } 514 /** 515 * Gets all unique Strings from the top level of this ObText as keys in an {@link OrderedMap}, with the first String 516 * associated with each key as its value (or null if nothing is associated with a key String). The returned map will 517 * retain the same order the keys were entered in, and you can use OrderedMap methods like 518 * {@link OrderedMap#keyAt(int)} to look up keys by index or {@link OrderedMap#getAt(int)} to look up value String 519 * by index. Changes to the returned Map won't be reflected in this ObText. 520 * @return an OrderedMap of unique String keys associated with the first associated String for each key (or null) 521 */ 522 public OrderedMap<String, String> basicOrderedMap() 523 { 524 final int sz = size(); 525 OrderedMap<String, String> keys = new OrderedMap<>(sz); 526 ObTextEntry got; 527 for (int i = 0; i < sz; i++) { 528 got = get(i); 529 keys.put(got.primary, got.firstAssociatedString()); 530 } 531 return keys; 532 } 533 /** 534 * Gets all unique Strings from the top level of this ObText as keys in a {@link HashMap}, with the first 535 * String associated with each key as its value (or null if nothing is associated with a key String). The returned 536 * map won't be insertion-ordered or necessarily retain the same order the Strings were entered in; use 537 * {@link #shallowOrderedMap()} if you want this. Changes to the returned Map won't be reflected in this ObText. 538 * @return a HashMap of unique String keys associated with the first associated String for each key (or null) 539 */ 540 public HashMap<String, String> basicMap() 541 { 542 final int sz = size(); 543 HashMap<String, String> keys = new HashMap<>(sz, 0.25f); 544 ObTextEntry got; 545 for (int i = 0; i < sz; i++) { 546 got = get(i); 547 keys.put(got.primary, got.firstAssociatedString()); 548 } 549 return keys; 550 } 551 552 /** 553 * Gets all unique Strings from the top level of this ObText as keys in an {@link OrderedMap}, with any Strings 554 * associated with those keys as their values (in a possibly-empty ArrayList of String for each value). 555 * The returned map will retain the same order the keys were entered in, and you can use OrderedMap methods like 556 * {@link OrderedMap#keyAt(int)} to look up keys by index or {@link OrderedMap#getAt(int)} to look up the ArrayList 557 * of value Strings by index. Changes to the returned Map won't be reflected in this ObText. 558 * @return an OrderedMap of unique String keys associated with ArrayList values containing associated Strings 559 */ 560 public OrderedMap<String, ArrayList<String>> shallowOrderedMap() 561 { 562 final int sz = size(); 563 OrderedMap<String, ArrayList<String>> keys = new OrderedMap<>(sz); 564 ObTextEntry got; 565 for (int i = 0; i < sz; i++) { 566 got = get(i); 567 keys.put(got.primary, got.allAssociatedStrings()); 568 } 569 return keys; 570 } 571 /** 572 * Gets all unique Strings from the top level of this ObText as keys in a {@link HashMap}, with any Strings 573 * associated with those keys as their values (in a possibly-empty ArrayList of String for each value). 574 * The returned map won't be insertion-ordered or necessarily retain the same order the Strings were entered in; use 575 * {@link #basicOrderedMap()} if you want this. Changes to the returned Map won't be reflected in this ObText. 576 * @return a HashMap of unique String keys associated with ArrayList values containing associated Strings 577 */ 578 public HashMap<String, ArrayList<String>> shallowMap() 579 { 580 final int sz = size(); 581 HashMap<String, ArrayList<String>> keys = new HashMap<>(sz, 0.25f); 582 ObTextEntry got; 583 for (int i = 0; i < sz; i++) { 584 got = get(i); 585 keys.put(got.primary, got.allAssociatedStrings()); 586 } 587 return keys; 588 } 589 590 /** 591 * Can be used to help reading sequences of Strings with ObText-style quotation marking their boundaries. 592 * This returns a {@link ContentMatcher} object that you must call setTarget on before using it. 593 * The argument(s) to setTarget should be the text that might contain quotes, heredoc-style quotes, or just bare 594 * words. Calling {@link ContentMatcher#find()} will try to find the next String, returning false if there's nothing 595 * left or returning true and advancing the search if a String was found. The String might be a special term in some 596 * cases, like "[" and "]" without quotes being syntax in ObText that don't contain usable Strings. That's why, 597 * after a String was found with find(), you should check {@link ContentMatcher#hasMatch()} to verify that a match 598 * was successful, and if that's true, then you can call {@link ContentMatcher#getMatch()} to get the un-quoted 599 * contents of the next String in the target. 600 * @return a {@link ContentMatcher} that must have one of its setTarget() methods called before it can be used 601 */ 602 public static ContentMatcher makeMatcher() 603 { 604 return new ContentMatcher(); 605 } 606 /** 607 * Can be used to help reading sequences of Strings with ObText-style quotation marking their boundaries. 608 * This returns a {@link ContentMatcher} object that is already configured to read from {@code text}. 609 * The {@code text} should contain Strings that may be surrounded by quotes, heredoc-style quotes, or just bare 610 * words. Calling {@link ContentMatcher#find()} will try to find the next String, returning false if there's nothing 611 * left or returning true and advancing the search if a String was found. The String might be a special term in some 612 * cases, like "[" and "]" without quotes being syntax in ObText that don't contain usable Strings. That's why, 613 * after a String was found with find(), you should check {@link ContentMatcher#hasMatch()} to verify that a match 614 * was successful, and if that's true, then you can call {@link ContentMatcher#getMatch()} to get the un-quoted 615 * contents of the next String in the target. 616 * @param text the target String that should probably have at least one sub-string that might be quoted 617 * @return a {@link ContentMatcher} that can be used immediately by calling {@link ContentMatcher#find()} 618 */ 619 public static ContentMatcher makeMatcher(CharSequence text) 620 { 621 return new ContentMatcher(text); 622 } 623 624 /** 625 * Can be used to help reading sequences of Strings with ObText-style quotation marking their boundaries, but no 626 * comments (which allows some additional characters to be used in bare words, like '#'). 627 * This returns a {@link ContentMatcher} object that is already configured to read from {@code text}. 628 * The {@code text} should contain Strings that may be surrounded by quotes, heredoc-style quotes, or just bare 629 * words. Calling {@link ContentMatcher#find()} will try to find the next String, returning false if there's nothing 630 * left or returning true and advancing the search if a String was found. Unlike the ContentMatcher produced by 631 * {@link #makeMatcher(CharSequence)}, you can call {@link ContentMatcher#getMatch()} after any successful call to 632 * {@link ContentMatcher#find()}, which will get the un-quoted contents of the next String in the target. 633 * @param text the target String that should probably have at least one sub-string that might be quoted 634 * @return a {@link ContentMatcher} that can be used immediately by calling {@link ContentMatcher#find()} 635 */ 636 public static ContentMatcher makeMatcherNoComments(CharSequence text) 637 { 638 return new ContentMatcher(text, patternRelaxed); 639 } 640 641 public static class ContentMatcher extends Matcher { 642 643 /** 644 * Constructs a ContentMatcher that will need to have its target set with {@link #setTarget(CharSequence)} or 645 * one of its overloads. The target should contain multiple substrings that may have quotation around them; this 646 * class is meant to skip the quotation in ObText's style. 647 */ 648 public ContentMatcher() 649 { 650 super(pattern); 651 } 652 653 /** 654 * Constructs a ContentMatcher that already has its target set to {@code text}. 655 * @param text the CharSequence, such as a String, to find possibly-quoted Strings in. 656 */ 657 public ContentMatcher(CharSequence text) 658 { 659 super(pattern, text); 660 } 661 /** 662 * Constructs a ContentMatcher that already has its target set to {@code text} and uses an alternate Pattern. 663 */ 664 ContentMatcher(CharSequence text, Pattern altPattern) 665 { 666 super(altPattern, text); 667 } 668 669 670 /** 671 * Supplies a text to search in/match with. 672 * Resets current search position to zero. 673 * 674 * @param text - a data 675 * @see Matcher#setTarget(Matcher, int) 676 * @see Matcher#setTarget(CharSequence, int, int) 677 * @see Matcher#setTarget(char[], int, int) 678 * @see Matcher#setTarget(Reader, int) 679 */ 680 @Override 681 public void setTarget(CharSequence text) { 682 super.setTarget(text); 683 } 684 685 /** 686 * Supplies a text to search in/match with, as a part of String. 687 * Resets current search position to zero. 688 * 689 * @param text - a data source 690 * @param start - where the target starts 691 * @param len - how long is the target 692 * @see Matcher#setTarget(Matcher, int) 693 * @see Matcher#setTarget(CharSequence) 694 * @see Matcher#setTarget(char[], int, int) 695 * @see Matcher#setTarget(Reader, int) 696 */ 697 @Override 698 public void setTarget(CharSequence text, int start, int len) { 699 super.setTarget(text, start, len); 700 } 701 702 /** 703 * Supplies a text to search in/match with, as a part of char array. 704 * Resets current search position to zero. 705 * 706 * @param text - a data source 707 * @param start - where the target starts 708 * @param len - how long is the target 709 * @see Matcher#setTarget(Matcher, int) 710 * @see Matcher#setTarget(CharSequence) 711 * @see Matcher#setTarget(CharSequence, int, int) 712 * @see Matcher#setTarget(Reader, int) 713 */ 714 @Override 715 public void setTarget(char[] text, int start, int len) { 716 super.setTarget(text, start, len); 717 } 718 719 /** 720 * Returns true if {@link #find()} has returned true and the found text is a usable String (not some syntax). 721 * If this returns true, you can reasonably get a (possibly empty) String using {@link #getMatch()}. 722 * @return true if there is a usable String found that can be obtained with {@link #getMatch()} 723 */ 724 public boolean hasMatch() 725 { 726 return isCaptured(stringId); 727 } 728 729 /** 730 * Returns the contents of the latest String successfully found with {@link #find()}, without quotation. 731 * You should typically call {@link #hasMatch()} even if find() has returned true, to ensure there is a valid 732 * String that can be acquired (this will return an empty String if hasMatch() returns false, but an empty 733 * String is also potentially a valid result in a successful match, so it should be distinguished). 734 * @return the contents of the latest String successfully found with {@link #find()} 735 */ 736 public String getMatch() 737 { 738 return group(stringId); 739 } 740 } 741 742}